Python rl.memory.SequentialMemory() Examples
The following are 30
code examples of rl.memory.SequentialMemory().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
rl.memory
, or try the search function
.
Example #1
Source File: train_agent_kerasrl.py From gym-malware with MIT License | 7 votes |
def train_dqn_model(layers, rounds=10000, run_test=False, use_score=False): ENV_NAME = 'malware-score-v0' if use_score else 'malware-v0' env = gym.make(ENV_NAME) env.seed(123) nb_actions = env.action_space.n window_length = 1 # "experience" consists of where we were, where we are now # generate a policy model model = generate_dense_model((window_length,) + env.observation_space.shape, layers, nb_actions) # configure and compile our agent # BoltzmannQPolicy selects an action stochastically with a probability generated by soft-maxing Q values policy = BoltzmannQPolicy() # memory can help a model during training # for this, we only consider a single malware sample (window_length=1) for each "experience" memory = SequentialMemory(limit=32, ignore_episode_boundaries=False, window_length=window_length) # DQN agent as described in Mnih (2013) and Mnih (2015). # http://arxiv.org/pdf/1312.5602.pdf # http://arxiv.org/abs/1509.06461 agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=16, enable_double_dqn=True, enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=policy, batch_size=16) # keras-rl allows one to use and built-in keras optimizer agent.compile(RMSprop(lr=1e-3), metrics=['mae']) # play the game. learn something! agent.fit(env, nb_steps=rounds, visualize=False, verbose=2) history_train = env.history history_test = None if run_test: # Set up the testing environment TEST_NAME = 'malware-score-test-v0' if use_score else 'malware-test-v0' test_env = gym.make(TEST_NAME) # evaluate the agent on a few episodes, drawing randomly from the test samples agent.test(test_env, nb_episodes=100, visualize=False) history_test = test_env.history return agent, model, history_train, history_test
Example #2
Source File: test_continuous.py From keras-rl2 with MIT License | 6 votes |
def test_ddpg(): # TODO: replace this with a simpler environment where we can actually test if it finds a solution env = gym.make('Pendulum-v0') np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.shape[0] actor = Sequential() actor.add(Flatten(input_shape=(1,) + env.observation_space.shape)) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('linear')) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = Concatenate()([action_input, flattened_observation]) x = Dense(16)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) memory = SequentialMemory(limit=1000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=50, nb_steps_warmup_actor=50, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile([Adam(lr=1e-3), Adam(lr=1e-3)]) agent.fit(env, nb_steps=400, visualize=False, verbose=0, nb_max_episode_steps=100) h = agent.test(env, nb_episodes=2, visualize=False, nb_max_episode_steps=100) # TODO: evaluate history
Example #3
Source File: test_dqn.py From keras-rl with MIT License | 6 votes |
def test_single_continuous_dqn_input(): nb_actions = 2 V_model = Sequential() V_model.add(Flatten(input_shape=(2, 3))) V_model.add(Dense(1)) mu_model = Sequential() mu_model.add(Flatten(input_shape=(2, 3))) mu_model.add(Dense(nb_actions)) L_input = Input(shape=(2, 3)) L_input_action = Input(shape=(nb_actions,)) x = Concatenate()([Flatten()(L_input), L_input_action]) x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x) L_model = Model(inputs=[L_input_action, L_input], outputs=x) memory = SequentialMemory(limit=10, window_length=2) agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model, memory=memory, nb_steps_warmup=5, batch_size=4) agent.compile('sgd') agent.fit(MultiInputTestEnv((3,)), nb_steps=10)
Example #4
Source File: test_ddpg.py From keras-rl with MIT License | 6 votes |
def test_single_ddpg_input(): nb_actions = 2 actor = Sequential() actor.add(Flatten(input_shape=(2, 3))) actor.add(Dense(nb_actions)) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(2, 3), name='observation_input') x = Concatenate()([action_input, Flatten()(observation_input)]) x = Dense(1)(x) critic = Model(inputs=[action_input, observation_input], outputs=x) memory = SequentialMemory(limit=10, window_length=2) agent = DDPGAgent(actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_actions=2, nb_steps_warmup_critic=5, nb_steps_warmup_actor=5, batch_size=4) agent.compile('sgd') agent.fit(MultiInputTestEnv((3,)), nb_steps=10)
Example #5
Source File: test_continuous.py From keras-rl with MIT License | 6 votes |
def test_ddpg(): # TODO: replace this with a simpler environment where we can actually test if it finds a solution env = gym.make('Pendulum-v0') np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.shape[0] actor = Sequential() actor.add(Flatten(input_shape=(1,) + env.observation_space.shape)) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('linear')) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = Concatenate()([action_input, flattened_observation]) x = Dense(16)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) memory = SequentialMemory(limit=1000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=50, nb_steps_warmup_actor=50, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile([Adam(lr=1e-3), Adam(lr=1e-3)]) agent.fit(env, nb_steps=400, visualize=False, verbose=0, nb_max_episode_steps=100) h = agent.test(env, nb_episodes=2, visualize=False, nb_max_episode_steps=100) # TODO: evaluate history
Example #6
Source File: test_discrete.py From keras-rl with MIT License | 6 votes |
def test_duel_dqn(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1,))) model.add(Activation('relu')) model.add(Dense(nb_actions, activation='linear')) memory = SequentialMemory(limit=1000, window_length=1) policy = EpsGreedyQPolicy(eps=.1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50, target_model_update=1e-1, policy=policy, enable_double_dqn=False, enable_dueling_network=True) dqn.compile(Adam(lr=1e-3)) dqn.fit(env, nb_steps=2000, visualize=False, verbose=0) policy.eps = 0. h = dqn.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
Example #7
Source File: agent_custom_q1.py From neuron_poker with MIT License | 6 votes |
def initiate_agent(self, nb_actions): """initiate a deep Q agent""" self.model = Sequential() self.model.add(Dense(512, activation='relu', input_shape=env.observation_space)) # pylint: disable=no-member self.model.add(Dropout(0.2)) self.model.add(Dense(512, activation='relu')) self.model.add(Dropout(0.2)) self.model.add(Dense(512, activation='relu')) self.model.add(Dropout(0.2)) self.model.add(Dense(nb_actions, activation='linear')) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=memory_limit, window_length=window_length) # pylint: disable=unused-variable policy = TrumpPolicy() # pylint: disable=unused-variable
Example #8
Source File: test_discrete.py From keras-rl2 with MIT License | 6 votes |
def test_duel_dqn(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1,))) model.add(Activation('relu')) model.add(Dense(nb_actions, activation='linear')) memory = SequentialMemory(limit=1000, window_length=1) policy = EpsGreedyQPolicy(eps=.1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50, target_model_update=1e-1, policy=policy, enable_double_dqn=False, enable_dueling_network=True) dqn.compile(Adam(lr=1e-3)) dqn.fit(env, nb_steps=2000, visualize=False, verbose=0) policy.eps = 0. h = dqn.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
Example #9
Source File: test_discrete.py From keras-rl2 with MIT License | 6 votes |
def test_double_dqn(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1,))) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) memory = SequentialMemory(limit=1000, window_length=1) policy = EpsGreedyQPolicy(eps=.1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50, target_model_update=1e-1, policy=policy, enable_double_dqn=True) dqn.compile(Adam(lr=1e-3)) dqn.fit(env, nb_steps=2000, visualize=False, verbose=0) policy.eps = 0. h = dqn.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
Example #10
Source File: test_dqn.py From keras-rl2 with MIT License | 6 votes |
def test_single_continuous_dqn_input(): nb_actions = 2 V_model = Sequential() V_model.add(Flatten(input_shape=(2, 3))) V_model.add(Dense(1)) mu_model = Sequential() mu_model.add(Flatten(input_shape=(2, 3))) mu_model.add(Dense(nb_actions)) L_input = Input(shape=(2, 3)) L_input_action = Input(shape=(nb_actions,)) x = Concatenate()([Flatten()(L_input), L_input_action]) x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x) L_model = Model(inputs=[L_input_action, L_input], outputs=x) memory = SequentialMemory(limit=10, window_length=2) agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model, memory=memory, nb_steps_warmup=5, batch_size=4) agent.compile('sgd') agent.fit(MultiInputTestEnv((3,)), nb_steps=10)
Example #11
Source File: test_ddpg.py From keras-rl2 with MIT License | 6 votes |
def test_single_ddpg_input(): nb_actions = 2 actor = Sequential() actor.add(Flatten(input_shape=(2, 3))) actor.add(Dense(nb_actions)) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(2, 3), name='observation_input') x = Concatenate()([action_input, Flatten()(observation_input)]) x = Dense(1)(x) critic = Model(inputs=[action_input, observation_input], outputs=x) memory = SequentialMemory(limit=10, window_length=2) agent = DDPGAgent(actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_actions=2, nb_steps_warmup_critic=5, nb_steps_warmup_actor=5, batch_size=4) agent.compile('sgd') agent.fit(MultiInputTestEnv((3,)), nb_steps=10)
Example #12
Source File: dqn_breakout_test.py From Deep-Learning-Quick-Reference with MIT License | 6 votes |
def main(): ENV_NAME = 'BreakoutDeterministic-v4' INPUT_SHAPE = (84, 84) WINDOW_LENGTH = 4 # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(42) env.seed(42) num_actions = env.action_space.n model = build_model(INPUT_SHAPE, num_actions) memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH) processor = AtariProcessor() policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=1000000) dqn = DQNAgent(model=model, nb_actions=num_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000, train_interval=4, delta_clip=1.) dqn.compile(Adam(lr=.00025), metrics=['mae']) callbacks = build_callbacks(ENV_NAME) # After training is done, we save the final weights. dqn.load_weights('dqn_BreakoutDeterministic-v4_weights_1750000.h5f') # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=10, visualize=True)
Example #13
Source File: agent_keras_rl_dqn.py From neuron_poker with MIT License | 5 votes |
def initiate_agent(self, env): """initiate a deep Q agent""" tf.compat.v1.disable_eager_execution() self.env = env nb_actions = self.env.action_space.n self.model = Sequential() self.model.add(Dense(512, activation='relu', input_shape=env.observation_space)) self.model.add(Dropout(0.2)) self.model.add(Dense(512, activation='relu')) self.model.add(Dropout(0.2)) self.model.add(Dense(512, activation='relu')) self.model.add(Dropout(0.2)) self.model.add(Dense(nb_actions, activation='linear')) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=memory_limit, window_length=window_length) policy = TrumpPolicy() nb_actions = env.action_space.n self.dqn = DQNAgent(model=self.model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup, target_model_update=1e-2, policy=policy, processor=CustomProcessor(), batch_size=batch_size, train_interval=train_interval, enable_double_dqn=enable_double_dqn) self.dqn.compile(tf.keras.optimizers.Adam(lr=1e-3), metrics=['mae'])
Example #14
Source File: test_ddpg.py From keras-rl with MIT License | 5 votes |
def test_multi_ddpg_input(): nb_actions = 2 actor_observation_input1 = Input(shape=(2, 3), name='actor_observation_input1') actor_observation_input2 = Input(shape=(2, 4), name='actor_observation_input2') actor = Sequential() x = Concatenate()([actor_observation_input1, actor_observation_input2]) x = Flatten()(x) x = Dense(nb_actions)(x) actor = Model(inputs=[actor_observation_input1, actor_observation_input2], outputs=x) action_input = Input(shape=(nb_actions,), name='action_input') critic_observation_input1 = Input(shape=(2, 3), name='critic_observation_input1') critic_observation_input2 = Input(shape=(2, 4), name='critic_observation_input2') x = Concatenate()([critic_observation_input1, critic_observation_input2]) x = Concatenate()([action_input, Flatten()(x)]) x = Dense(1)(x) critic = Model(inputs=[action_input, critic_observation_input1, critic_observation_input2], outputs=x) processor = MultiInputProcessor(nb_inputs=2) memory = SequentialMemory(limit=10, window_length=2) agent = DDPGAgent(actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_actions=2, nb_steps_warmup_critic=5, nb_steps_warmup_actor=5, batch_size=4, processor=processor) agent.compile('sgd') agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=10)
Example #15
Source File: agent_keras_rl_dqn.py From neuron_poker with MIT License | 5 votes |
def play(self, nb_episodes=5, render=False): """Let the agent play""" memory = SequentialMemory(limit=memory_limit, window_length=window_length) policy = TrumpPolicy() class CustomProcessor(Processor): # pylint: disable=redefined-outer-name """The agent and the environment""" def process_state_batch(self, batch): """ Given a state batch, I want to remove the second dimension, because it's useless and prevents me from feeding the tensor into my CNN """ return np.squeeze(batch, axis=1) def process_info(self, info): processed_info = info['player_data'] if 'stack' in processed_info: processed_info = {'x': 1} return processed_info nb_actions = self.env.action_space.n self.dqn = DQNAgent(model=self.model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup, target_model_update=1e-2, policy=policy, processor=CustomProcessor(), batch_size=batch_size, train_interval=train_interval, enable_double_dqn=enable_double_dqn) self.dqn.compile(tf.optimizers.Adam(lr=1e-3), metrics=['mae']) # pylint: disable=no-member self.dqn.test(self.env, nb_episodes=nb_episodes, visualize=render)
Example #16
Source File: dqn_lunar_lander.py From Deep-Learning-Quick-Reference with MIT License | 5 votes |
def main(): ENV_NAME = 'LunarLander-v2' # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(42) env.seed(42) num_actions = env.action_space.n state_space = env.observation_space.shape[0] print(num_actions) model = build_model(state_space, num_actions) memory = SequentialMemory(limit=50000, window_length=1) policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=10000) dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=0.00025), metrics=['mae']) callbacks = build_callbacks(ENV_NAME) dqn.fit(env, nb_steps=500000, visualize=False, verbose=2, callbacks=callbacks) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=True)
Example #17
Source File: dqn_breakout.py From Deep-Learning-Quick-Reference with MIT License | 5 votes |
def main(): ENV_NAME = 'BreakoutDeterministic-v4' INPUT_SHAPE = (84, 84) WINDOW_LENGTH = 4 # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(42) env.seed(42) num_actions = env.action_space.n input_shape = (WINDOW_LENGTH,) + INPUT_SHAPE model = build_model(INPUT_SHAPE, num_actions) memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH) processor = AtariProcessor() policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=1000000) dqn = DQNAgent(model=model, nb_actions=num_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000, train_interval=4, delta_clip=1.) dqn.compile(Adam(lr=.00025), metrics=['mae']) callbacks = build_callbacks(ENV_NAME) dqn.fit(env, nb_steps=1750000, log_interval=10000, visualize=False, verbose=2, callbacks=callbacks) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=10, visualize=True)
Example #18
Source File: test_discrete.py From keras-rl with MIT License | 5 votes |
def test_double_dqn(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1,))) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) memory = SequentialMemory(limit=1000, window_length=1) policy = EpsGreedyQPolicy(eps=.1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50, target_model_update=1e-1, policy=policy, enable_double_dqn=True) dqn.compile(Adam(lr=1e-3)) dqn.fit(env, nb_steps=2000, visualize=False, verbose=0) policy.eps = 0. h = dqn.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
Example #19
Source File: dqn_cartpole.py From Deep-Learning-Quick-Reference with MIT License | 5 votes |
def main(): ENV_NAME = 'CartPole-v0' # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(42) env.seed(42) num_actions = env.action_space.n state_space = env.observation_space.shape[0] print(num_actions) model = build_model(state_space, num_actions) memory = SequentialMemory(limit=50000, window_length=1) policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=10000) dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) callbacks = build_callbacks(ENV_NAME) dqn.fit(env, nb_steps=50000, visualize=False, verbose=2, callbacks=callbacks) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=True)
Example #20
Source File: dqn_lunar_lander_test.py From Deep-Learning-Quick-Reference with MIT License | 5 votes |
def main(): ENV_NAME = 'LunarLander-v2' # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(42) env.seed(42) num_actions = env.action_space.n state_space = env.observation_space.shape[0] print(num_actions) model = build_model(state_space, num_actions) memory = SequentialMemory(limit=50000, window_length=1) policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=10000) dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) callbacks = build_callbacks(ENV_NAME) # After training is done, we save the final weights. dqn.load_weights('dqn_LunarLander-v2_weights_510000.h5f') # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=10, visualize=True)
Example #21
Source File: test_continuous.py From keras-rl with MIT License | 5 votes |
def test_cdqn(): # TODO: replace this with a simpler environment where we can actually test if it finds a solution env = gym.make('Pendulum-v0') np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.shape[0] V_model = Sequential() V_model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) V_model.add(Dense(16)) V_model.add(Activation('relu')) V_model.add(Dense(1)) mu_model = Sequential() mu_model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) mu_model.add(Dense(16)) mu_model.add(Activation('relu')) mu_model.add(Dense(nb_actions)) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input') x = Concatenate()([action_input, Flatten()(observation_input)]) x = Dense(16)(x) x = Activation('relu')(x) x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x) L_model = Model(inputs=[action_input, observation_input], outputs=x) memory = SequentialMemory(limit=1000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions) agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model, memory=memory, nb_steps_warmup=50, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=1e-3)) agent.fit(env, nb_steps=400, visualize=False, verbose=0, nb_max_episode_steps=100) h = agent.test(env, nb_episodes=2, visualize=False, nb_max_episode_steps=100) # TODO: evaluate history
Example #22
Source File: test_dqn.py From keras-rl with MIT License | 5 votes |
def test_multi_continuous_dqn_input(): nb_actions = 2 V_input1 = Input(shape=(2, 3)) V_input2 = Input(shape=(2, 4)) x = Concatenate()([V_input1, V_input2]) x = Flatten()(x) x = Dense(1)(x) V_model = Model(inputs=[V_input1, V_input2], outputs=x) mu_input1 = Input(shape=(2, 3)) mu_input2 = Input(shape=(2, 4)) x = Concatenate()([mu_input1, mu_input2]) x = Flatten()(x) x = Dense(nb_actions)(x) mu_model = Model(inputs=[mu_input1, mu_input2], outputs=x) L_input1 = Input(shape=(2, 3)) L_input2 = Input(shape=(2, 4)) L_input_action = Input(shape=(nb_actions,)) x = Concatenate()([L_input1, L_input2]) x = Concatenate()([Flatten()(x), L_input_action]) x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x) L_model = Model(inputs=[L_input_action, L_input1, L_input2], outputs=x) memory = SequentialMemory(limit=10, window_length=2) processor = MultiInputProcessor(nb_inputs=2) agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model, memory=memory, nb_steps_warmup=5, batch_size=4, processor=processor) agent.compile('sgd') agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=10)
Example #23
Source File: test_dqn.py From keras-rl with MIT License | 5 votes |
def test_multi_dqn_input(): input1 = Input(shape=(2, 3)) input2 = Input(shape=(2, 4)) x = Concatenate()([input1, input2]) x = Flatten()(x) x = Dense(2)(x) model = Model(inputs=[input1, input2], outputs=x) memory = SequentialMemory(limit=10, window_length=2) processor = MultiInputProcessor(nb_inputs=2) for double_dqn in (True, False): agent = DQNAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4, processor=processor, enable_double_dqn=double_dqn) agent.compile('sgd') agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=10)
Example #24
Source File: test_memory.py From keras-rl with MIT License | 5 votes |
def test_training_flag(): obs_size = (3, 4) obs0 = np.random.random(obs_size) terminal0 = False obs1 = np.random.random(obs_size) terminal1 = True obs2 = np.random.random(obs_size) terminal2 = False for training in (True, False): memory = SequentialMemory(3, window_length=2) state = np.array(memory.get_recent_state(obs0)) assert state.shape == (2,) + obs_size assert np.allclose(state[0], 0.) assert np.all(state[1] == obs0) assert memory.nb_entries == 0 memory.append(obs0, 0, 0., terminal1, training=training) state = np.array(memory.get_recent_state(obs1)) assert state.shape == (2,) + obs_size assert np.all(state[0] == obs0) assert np.all(state[1] == obs1) if training: assert memory.nb_entries == 1 else: assert memory.nb_entries == 0 memory.append(obs1, 0, 0., terminal2, training=training) state = np.array(memory.get_recent_state(obs2)) assert state.shape == (2,) + obs_size assert np.allclose(state[0], 0.) assert np.all(state[1] == obs2) if training: assert memory.nb_entries == 2 else: assert memory.nb_entries == 0
Example #25
Source File: keras_ddpg.py From costar_plan with Apache License 2.0 | 5 votes |
def __init__(self, env, *args, **kwargs): super(KerasDDPGAgent, self).__init__(*args, **kwargs) self.env = env #assert len(env.action_space.shape) == 1 #TODO: is there a way to output a tuple (6,1) nb_actions = sum(sum(1 for i in row if i) for row in self.env.action_space.sample()) #TODO: terminology? feature or observation? observation = env.reset() print ">>>>>>>>>>>>>>>>>>>", observation.shape # TODO: find a way to customize network actor = Sequential() actor.add(Flatten(input_shape=(1,) + observation.shape)) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('tanh')) actor.add(Lambda(lambda x: x * 3.14159)) print(actor.summary()) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + observation.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = merge([action_input, flattened_observation], mode='concat') x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(input=[action_input, observation_input], output=x) print(critic.summary()) memory = SequentialMemory(limit=500000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) self.agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000, random_process=random_process, gamma=.99, target_model_update=1e-3) self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
Example #26
Source File: test_continuous.py From keras-rl2 with MIT License | 5 votes |
def test_cdqn(): # TODO: replace this with a simpler environment where we can actually test if it finds a solution env = gym.make('Pendulum-v0') np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.shape[0] V_model = Sequential() V_model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) V_model.add(Dense(16)) V_model.add(Activation('relu')) V_model.add(Dense(1)) mu_model = Sequential() mu_model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) mu_model.add(Dense(16)) mu_model.add(Activation('relu')) mu_model.add(Dense(nb_actions)) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input') x = Concatenate()([action_input, Flatten()(observation_input)]) x = Dense(16)(x) x = Activation('relu')(x) x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x) L_model = Model(inputs=[action_input, observation_input], outputs=x) memory = SequentialMemory(limit=1000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions) agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model, memory=memory, nb_steps_warmup=50, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=1e-3)) agent.fit(env, nb_steps=400, visualize=False, verbose=0, nb_max_episode_steps=100) h = agent.test(env, nb_episodes=2, visualize=False, nb_max_episode_steps=100) # TODO: evaluate history
Example #27
Source File: test_dqn.py From keras-rl2 with MIT License | 5 votes |
def test_multi_continuous_dqn_input(): nb_actions = 2 V_input1 = Input(shape=(2, 3)) V_input2 = Input(shape=(2, 4)) x = Concatenate()([V_input1, V_input2]) x = Flatten()(x) x = Dense(1)(x) V_model = Model(inputs=[V_input1, V_input2], outputs=x) mu_input1 = Input(shape=(2, 3)) mu_input2 = Input(shape=(2, 4)) x = Concatenate()([mu_input1, mu_input2]) x = Flatten()(x) x = Dense(nb_actions)(x) mu_model = Model(inputs=[mu_input1, mu_input2], outputs=x) L_input1 = Input(shape=(2, 3)) L_input2 = Input(shape=(2, 4)) L_input_action = Input(shape=(nb_actions,)) x = Concatenate()([L_input1, L_input2]) x = Concatenate()([Flatten()(x), L_input_action]) x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x) L_model = Model(inputs=[L_input_action, L_input1, L_input2], outputs=x) memory = SequentialMemory(limit=10, window_length=2) processor = MultiInputProcessor(nb_inputs=2) agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model, memory=memory, nb_steps_warmup=5, batch_size=4, processor=processor) agent.compile('sgd') agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=10)
Example #28
Source File: test_dqn.py From keras-rl2 with MIT License | 5 votes |
def test_multi_dqn_input(): input1 = Input(shape=(2, 3)) input2 = Input(shape=(2, 4)) x = Concatenate()([input1, input2]) x = Flatten()(x) x = Dense(2)(x) model = Model(inputs=[input1, input2], outputs=x) memory = SequentialMemory(limit=10, window_length=2) processor = MultiInputProcessor(nb_inputs=2) for double_dqn in (True, False): agent = DQNAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4, processor=processor, enable_double_dqn=double_dqn) agent.compile('sgd') agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=10)
Example #29
Source File: test_ddpg.py From keras-rl2 with MIT License | 5 votes |
def test_multi_ddpg_input(): nb_actions = 2 actor_observation_input1 = Input(shape=(2, 3), name='actor_observation_input1') actor_observation_input2 = Input(shape=(2, 4), name='actor_observation_input2') actor = Sequential() x = Concatenate()([actor_observation_input1, actor_observation_input2]) x = Flatten()(x) x = Dense(nb_actions)(x) actor = Model(inputs=[actor_observation_input1, actor_observation_input2], outputs=x) action_input = Input(shape=(nb_actions,), name='action_input') critic_observation_input1 = Input(shape=(2, 3), name='critic_observation_input1') critic_observation_input2 = Input(shape=(2, 4), name='critic_observation_input2') x = Concatenate()([critic_observation_input1, critic_observation_input2]) x = Concatenate()([action_input, Flatten()(x)]) x = Dense(1)(x) critic = Model(inputs=[action_input, critic_observation_input1, critic_observation_input2], outputs=x) processor = MultiInputProcessor(nb_inputs=2) memory = SequentialMemory(limit=10, window_length=2) agent = DDPGAgent(actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_actions=2, nb_steps_warmup_critic=5, nb_steps_warmup_actor=5, batch_size=4, processor=processor) agent.compile('sgd') agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=10)
Example #30
Source File: test_memory.py From keras-rl2 with MIT License | 5 votes |
def test_training_flag(): obs_size = (3, 4) obs0 = np.random.random(obs_size) terminal0 = False obs1 = np.random.random(obs_size) terminal1 = True obs2 = np.random.random(obs_size) terminal2 = False for training in (True, False): memory = SequentialMemory(3, window_length=2) state = np.array(memory.get_recent_state(obs0)) assert state.shape == (2,) + obs_size assert np.allclose(state[0], 0.) assert np.all(state[1] == obs0) assert memory.nb_entries == 0 memory.append(obs0, 0, 0., terminal1, training=training) state = np.array(memory.get_recent_state(obs1)) assert state.shape == (2,) + obs_size assert np.all(state[0] == obs0) assert np.all(state[1] == obs1) if training: assert memory.nb_entries == 1 else: assert memory.nb_entries == 0 memory.append(obs1, 0, 0., terminal2, training=training) state = np.array(memory.get_recent_state(obs2)) assert state.shape == (2,) + obs_size assert np.allclose(state[0], 0.) assert np.all(state[1] == obs2) if training: assert memory.nb_entries == 2 else: assert memory.nb_entries == 0