Python rl.policy.EpsGreedyQPolicy() Examples
The following are 17
code examples of rl.policy.EpsGreedyQPolicy().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
rl.policy
, or try the search function
.
Example #1
Source File: sarsa.py From keras-rl2 with MIT License | 6 votes |
def __init__(self, model, nb_actions, policy=None, test_policy=None, gamma=.99, nb_steps_warmup=10, train_interval=1, delta_clip=np.inf, *args, **kwargs): super(SarsaAgent, self).__init__(*args, **kwargs) # Do not use defaults in constructor because that would mean that each instance shares the same # policy. if policy is None: policy = EpsGreedyQPolicy() if test_policy is None: test_policy = GreedyQPolicy() self.model = model self.nb_actions = nb_actions self.policy = policy self.test_policy = test_policy self.gamma = gamma self.nb_steps_warmup = nb_steps_warmup self.train_interval = train_interval self.delta_clip = delta_clip self.compiled = False self.actions = None self.observations = None self.rewards = None
Example #2
Source File: dqn_breakout_test.py From Deep-Learning-Quick-Reference with MIT License | 6 votes |
def main(): ENV_NAME = 'BreakoutDeterministic-v4' INPUT_SHAPE = (84, 84) WINDOW_LENGTH = 4 # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(42) env.seed(42) num_actions = env.action_space.n model = build_model(INPUT_SHAPE, num_actions) memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH) processor = AtariProcessor() policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=1000000) dqn = DQNAgent(model=model, nb_actions=num_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000, train_interval=4, delta_clip=1.) dqn.compile(Adam(lr=.00025), metrics=['mae']) callbacks = build_callbacks(ENV_NAME) # After training is done, we save the final weights. dqn.load_weights('dqn_BreakoutDeterministic-v4_weights_1750000.h5f') # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=10, visualize=True)
Example #3
Source File: test_discrete.py From keras-rl with MIT License | 6 votes |
def test_duel_dqn(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1,))) model.add(Activation('relu')) model.add(Dense(nb_actions, activation='linear')) memory = SequentialMemory(limit=1000, window_length=1) policy = EpsGreedyQPolicy(eps=.1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50, target_model_update=1e-1, policy=policy, enable_double_dqn=False, enable_dueling_network=True) dqn.compile(Adam(lr=1e-3)) dqn.fit(env, nb_steps=2000, visualize=False, verbose=0) policy.eps = 0. h = dqn.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
Example #4
Source File: test_discrete.py From keras-rl with MIT License | 6 votes |
def test_dqn(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1,))) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) memory = SequentialMemory(limit=1000, window_length=1) policy = EpsGreedyQPolicy(eps=.1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50, target_model_update=1e-1, policy=policy, enable_double_dqn=False) dqn.compile(Adam(lr=1e-3)) dqn.fit(env, nb_steps=2000, visualize=False, verbose=0) policy.eps = 0. h = dqn.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
Example #5
Source File: sarsa.py From keras-rl with MIT License | 6 votes |
def __init__(self, model, nb_actions, policy=None, test_policy=None, gamma=.99, nb_steps_warmup=10, train_interval=1, delta_clip=np.inf, *args, **kwargs): super(SarsaAgent, self).__init__(*args, **kwargs) # Do not use defaults in constructor because that would mean that each instance shares the same # policy. if policy is None: policy = EpsGreedyQPolicy() if test_policy is None: test_policy = GreedyQPolicy() self.model = model self.nb_actions = nb_actions self.policy = policy self.test_policy = test_policy self.gamma = gamma self.nb_steps_warmup = nb_steps_warmup self.train_interval = train_interval self.delta_clip = delta_clip self.compiled = False self.actions = None self.observations = None self.rewards = None
Example #6
Source File: test_discrete.py From keras-rl2 with MIT License | 6 votes |
def test_duel_dqn(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1,))) model.add(Activation('relu')) model.add(Dense(nb_actions, activation='linear')) memory = SequentialMemory(limit=1000, window_length=1) policy = EpsGreedyQPolicy(eps=.1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50, target_model_update=1e-1, policy=policy, enable_double_dqn=False, enable_dueling_network=True) dqn.compile(Adam(lr=1e-3)) dqn.fit(env, nb_steps=2000, visualize=False, verbose=0) policy.eps = 0. h = dqn.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
Example #7
Source File: test_discrete.py From keras-rl2 with MIT License | 6 votes |
def test_double_dqn(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1,))) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) memory = SequentialMemory(limit=1000, window_length=1) policy = EpsGreedyQPolicy(eps=.1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50, target_model_update=1e-1, policy=policy, enable_double_dqn=True) dqn.compile(Adam(lr=1e-3)) dqn.fit(env, nb_steps=2000, visualize=False, verbose=0) policy.eps = 0. h = dqn.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
Example #8
Source File: test_discrete.py From keras-rl2 with MIT License | 6 votes |
def test_dqn(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1,))) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) memory = SequentialMemory(limit=1000, window_length=1) policy = EpsGreedyQPolicy(eps=.1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50, target_model_update=1e-1, policy=policy, enable_double_dqn=False) dqn.compile(Adam(lr=1e-3)) dqn.fit(env, nb_steps=2000, visualize=False, verbose=0) policy.eps = 0. h = dqn.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
Example #9
Source File: test_discrete.py From keras-rl with MIT License | 5 votes |
def test_double_dqn(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1,))) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) memory = SequentialMemory(limit=1000, window_length=1) policy = EpsGreedyQPolicy(eps=.1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50, target_model_update=1e-1, policy=policy, enable_double_dqn=True) dqn.compile(Adam(lr=1e-3)) dqn.fit(env, nb_steps=2000, visualize=False, verbose=0) policy.eps = 0. h = dqn.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
Example #10
Source File: dqn_lunar_lander.py From Deep-Learning-Quick-Reference with MIT License | 5 votes |
def main(): ENV_NAME = 'LunarLander-v2' # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(42) env.seed(42) num_actions = env.action_space.n state_space = env.observation_space.shape[0] print(num_actions) model = build_model(state_space, num_actions) memory = SequentialMemory(limit=50000, window_length=1) policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=10000) dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=0.00025), metrics=['mae']) callbacks = build_callbacks(ENV_NAME) dqn.fit(env, nb_steps=500000, visualize=False, verbose=2, callbacks=callbacks) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=True)
Example #11
Source File: dqn_breakout.py From Deep-Learning-Quick-Reference with MIT License | 5 votes |
def main(): ENV_NAME = 'BreakoutDeterministic-v4' INPUT_SHAPE = (84, 84) WINDOW_LENGTH = 4 # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(42) env.seed(42) num_actions = env.action_space.n input_shape = (WINDOW_LENGTH,) + INPUT_SHAPE model = build_model(INPUT_SHAPE, num_actions) memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH) processor = AtariProcessor() policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=1000000) dqn = DQNAgent(model=model, nb_actions=num_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000, train_interval=4, delta_clip=1.) dqn.compile(Adam(lr=.00025), metrics=['mae']) callbacks = build_callbacks(ENV_NAME) dqn.fit(env, nb_steps=1750000, log_interval=10000, visualize=False, verbose=2, callbacks=callbacks) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=10, visualize=True)
Example #12
Source File: dqn_cartpole.py From Deep-Learning-Quick-Reference with MIT License | 5 votes |
def main(): ENV_NAME = 'CartPole-v0' # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(42) env.seed(42) num_actions = env.action_space.n state_space = env.observation_space.shape[0] print(num_actions) model = build_model(state_space, num_actions) memory = SequentialMemory(limit=50000, window_length=1) policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=10000) dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) callbacks = build_callbacks(ENV_NAME) dqn.fit(env, nb_steps=50000, visualize=False, verbose=2, callbacks=callbacks) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=True)
Example #13
Source File: dqn_lunar_lander_test.py From Deep-Learning-Quick-Reference with MIT License | 5 votes |
def main(): ENV_NAME = 'LunarLander-v2' # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(42) env.seed(42) num_actions = env.action_space.n state_space = env.observation_space.shape[0] print(num_actions) model = build_model(state_space, num_actions) memory = SequentialMemory(limit=50000, window_length=1) policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=10000) dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) callbacks = build_callbacks(ENV_NAME) # After training is done, we save the final weights. dqn.load_weights('dqn_LunarLander-v2_weights_510000.h5f') # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=10, visualize=True)
Example #14
Source File: dqn.py From keras-rl with MIT License | 4 votes |
def __init__(self, model, policy=None, test_policy=None, enable_double_dqn=False, enable_dueling_network=False, dueling_type='avg', *args, **kwargs): super(DQNAgent, self).__init__(*args, **kwargs) # Validate (important) input. if hasattr(model.output, '__len__') and len(model.output) > 1: raise ValueError('Model "{}" has more than one output. DQN expects a model that has a single output.'.format(model)) if model.output._keras_shape != (None, self.nb_actions): raise ValueError('Model output "{}" has invalid shape. DQN expects a model that has one dimension for each action, in this case {}.'.format(model.output, self.nb_actions)) # Parameters. self.enable_double_dqn = enable_double_dqn self.enable_dueling_network = enable_dueling_network self.dueling_type = dueling_type if self.enable_dueling_network: # get the second last layer of the model, abandon the last layer layer = model.layers[-2] nb_action = model.output._keras_shape[-1] # layer y has a shape (nb_action+1,) # y[:,0] represents V(s;theta) # y[:,1:] represents A(s,a;theta) y = Dense(nb_action + 1, activation='linear')(layer.output) # caculate the Q(s,a;theta) # dueling_type == 'avg' # Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-Avg_a(A(s,a;theta))) # dueling_type == 'max' # Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-max_a(A(s,a;theta))) # dueling_type == 'naive' # Q(s,a;theta) = V(s;theta) + A(s,a;theta) if self.dueling_type == 'avg': outputlayer = Lambda(lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:] - K.mean(a[:, 1:], axis=1, keepdims=True), output_shape=(nb_action,))(y) elif self.dueling_type == 'max': outputlayer = Lambda(lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:] - K.max(a[:, 1:], axis=1, keepdims=True), output_shape=(nb_action,))(y) elif self.dueling_type == 'naive': outputlayer = Lambda(lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:], output_shape=(nb_action,))(y) else: assert False, "dueling_type must be one of {'avg','max','naive'}" model = Model(inputs=model.input, outputs=outputlayer) # Related objects. self.model = model if policy is None: policy = EpsGreedyQPolicy() if test_policy is None: test_policy = GreedyQPolicy() self.policy = policy self.test_policy = test_policy # State. self.reset_states()
Example #15
Source File: DQN_Agent_LSTM.py From Startcraft_pysc2_minigames with Apache License 2.0 | 4 votes |
def training_game(): env = Environment() input_shape = (FLAGS.screen_size, FLAGS.screen_size, 1) nb_actions = 12 # Number of actions model = neural_network_model(input_shape, nb_actions) memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH) processor = SC2Proc() # Policy policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr="eps", value_max=1, value_min=0.7, value_test=.0, nb_steps=1e6) # Agent dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, enable_double_dqn=False, nb_steps_warmup=500, # nb_steps_warmup=1, target_model_update=1e-2, policy=policy, batch_size=150, processor=processor) dqn.compile(Adam(lr=.001), metrics=["mae"]) # Tensorboard callback callbacks = keras.callbacks.TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=False) # Save the parameters and upload them when needed name = FLAGS.mini_game w_file = "dqn_{}_weights.h5f".format(name) check_w_file = "train_w" + name + "_weights.h5f" if SAVE_MODEL: check_w_file = "train_w" + name + "_weights_{step}.h5f" log_file = "training_w_{}_log.json".format(name) if LOAD_MODEL: dqn.load_weights(w_file) dqn.fit(env, callbacks=callbacks, nb_steps=1e7, action_repetition=2, log_interval=1e4, verbose=2) dqn.save_weights(w_file, overwrite=True) dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)
Example #16
Source File: DQN_Agent.py From Startcraft_pysc2_minigames with Apache License 2.0 | 4 votes |
def training_game(): env = Environment(map_name="HallucinIce", visualize=True, game_steps_per_episode=150, agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=64, minimap=32) )) input_shape = (_SIZE, _SIZE, 1) nb_actions = _SIZE * _SIZE # Should this be an integer model = neural_network_model(input_shape, nb_actions) # memory : how many subsequent observations should be provided to the network? memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH) processor = SC2Proc() ### Policy # Agent´s behaviour function. How the agent pick actions # LinearAnnealedPolicy is a wrapper that transforms the policy into a linear incremental linear solution . Then why im not see LAP with other than not greedy ? # EpsGreedyQPolicy is a way of selecting random actions with uniform distributions from a set of actions . Select an action that can give max or min rewards # BolztmanQPolicy . Assumption that it follows a Boltzman distribution. gives the probability that a system will be in a certain state as a function of that state´s energy?? policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr="eps", value_max=1, value_min=0.7, value_test=.0, nb_steps=1e6) # policy = (BoltzmanQPolicy( tau=1., clip= (-500,500)) #clip defined in between -500 / 500 ### Agent # Double Q-learning ( combines Q-Learning with a deep Neural Network ) # Q Learning -- Bellman equation dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=500, target_model_update=1e-2, policy=policy, batch_size=150, processor=processor) dqn.compile(Adam(lr=.001), metrics=["mae"]) ## Save the parameters and upload them when needed name = "HallucinIce" w_file = "dqn_{}_weights.h5f".format(name) check_w_file = "train_w" + name + "_weights.h5f" if SAVE_MODEL: check_w_file = "train_w" + name + "_weights_{step}.h5f" log_file = "training_w_{}_log.json".format(name) callbacks = [ModelIntervalCheckpoint(check_w_file, interval=1000)] callbacks += [FileLogger(log_file, interval=100)] if LOAD_MODEL: dqn.load_weights(w_file) dqn.fit(env, callbacks=callbacks, nb_steps=1e7, action_repetition=2, log_interval=1e4, verbose=2) dqn.save_weights(w_file, overwrite=True) dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)
Example #17
Source File: dqn.py From keras-rl2 with MIT License | 4 votes |
def __init__(self, model, policy=None, test_policy=None, enable_double_dqn=False, enable_dueling_network=False, dueling_type='avg', *args, **kwargs): super(DQNAgent, self).__init__(*args, **kwargs) # Validate (important) input. if list(model.output.shape) != list((None, self.nb_actions)): raise ValueError('Model output "{}" has invalid shape. DQN expects a model that has one dimension for each action, in this case {}.'.format(model.output, self.nb_actions)) # Parameters. self.enable_double_dqn = enable_double_dqn self.enable_dueling_network = enable_dueling_network self.dueling_type = dueling_type if self.enable_dueling_network: # get the second last layer of the model, abandon the last layer layer = model.layers[-2] nb_action = model.output.shape[-1] # layer y has a shape (nb_action+1,) # y[:,0] represents V(s;theta) # y[:,1:] represents A(s,a;theta) y = Dense(nb_action + 1, activation='linear')(layer.output) # caculate the Q(s,a;theta) # dueling_type == 'avg' # Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-Avg_a(A(s,a;theta))) # dueling_type == 'max' # Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-max_a(A(s,a;theta))) # dueling_type == 'naive' # Q(s,a;theta) = V(s;theta) + A(s,a;theta) if self.dueling_type == 'avg': outputlayer = Lambda(lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:] - K.mean(a[:, 1:], axis=1, keepdims=True), output_shape=(nb_action,))(y) elif self.dueling_type == 'max': outputlayer = Lambda(lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:] - K.max(a[:, 1:], axis=1, keepdims=True), output_shape=(nb_action,))(y) elif self.dueling_type == 'naive': outputlayer = Lambda(lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:], output_shape=(nb_action,))(y) else: assert False, "dueling_type must be one of {'avg','max','naive'}" model = Model(inputs=model.input, outputs=outputlayer) # Related objects. self.model = model if policy is None: policy = EpsGreedyQPolicy() if test_policy is None: test_policy = GreedyQPolicy() self.policy = policy self.test_policy = test_policy # State. self.reset_states()