Python baselines.common.atari_wrappers_deprecated.wrap_dqn() Examples

The following are 26 code examples of baselines.common.atari_wrappers_deprecated.wrap_dqn(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module baselines.common.atari_wrappers_deprecated , or try the search function .
Example #1
Source File: train_pong.py    From learning2run with MIT License 6 votes vote down vote up
def main():
    env = gym.make("PongNoFrameskip-v4")
    env = ScaledFloatFrame(wrap_dqn(env))
    model = deepq.models.cnn_to_mlp(
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[256],
        dueling=True
    )
    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-4,
        max_timesteps=2000000,
        buffer_size=10000,
        exploration_fraction=0.1,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
        gamma=0.99,
        prioritized_replay=True
    )
    act.save("pong_model.pkl")
    env.close() 
Example #2
Source File: train_pong.py    From rl-attack-detection with MIT License 6 votes vote down vote up
def main():
    env = gym.make("PongNoFrameskip-v4")
    env = ScaledFloatFrame(wrap_dqn(env))
    model = deepq.models.cnn_to_mlp(
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[256],
        dueling=True
    )
    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-4,
        max_timesteps=2000000,
        buffer_size=10000,
        exploration_fraction=0.1,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
        gamma=0.99,
        prioritized_replay=True
    )
    act.save("pong_model.pkl")
    env.close() 
Example #3
Source File: train_pong.py    From emdqn with MIT License 6 votes vote down vote up
def main():
    env = gym.make("PongNoFrameskip-v4")
    env = ScaledFloatFrame(wrap_dqn(env))
    model = deepq.models.cnn_to_mlp(
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[256],
        dueling=True
    )
    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-4,
        max_timesteps=2000000,
        buffer_size=10000,
        exploration_fraction=0.1,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
        gamma=0.99,
        prioritized_replay=True
    )
    act.save("pong_model.pkl")
    env.close() 
Example #4
Source File: train.py    From BackpropThroughTheVoidRL with MIT License 5 votes vote down vote up
def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    monitored_env = bench.Monitor(env, logger.get_dir())  # puts rewards and number of steps in info, before environment is wrapped
    env = wrap_dqn(monitored_env)  # applies a bunch of modification to simplify the observation space (downsample, make b/w)
    return env, monitored_env 
Example #5
Source File: dqn_atari.py    From ngraph-python with Apache License 2.0 5 votes vote down vote up
def main():
    if False:
        # deterministic version 4 results in a frame skip of 4 and no repeat action probability
        environment = gym.make('BreakoutDeterministic-v4')
        environment = TerminateOnEndOfLifeWrapper(environment)
        environment = ReshapeWrapper(environment)
        environment = ClipRewardWrapper(environment)
        environment = RepeatWrapper(environment, frames=4)
    else:
        # use the environment wrappers found in openai baselines.
        environment = gym.make('BreakoutNoFrameskip-v4')
        environment = wrap_dqn(environment)
        environment = DimShuffleWrapper(environment)

    # todo: perhaps these should be defined in the environment itself
    state_axes = ng.make_axes([
        ng.make_axis(environment.observation_space.shape[0], name='C'),
        ng.make_axis(environment.observation_space.shape[1], name='H'),
        ng.make_axis(environment.observation_space.shape[2], name='W'),
    ])

    agent = dqn.Agent(
        state_axes,
        environment.action_space,
        model=model,
        epsilon=dqn.linear_generator(start=1.0, end=0.1, steps=1000000),
        gamma=0.99,
        learning_rate=0.00025,
        memory=dqn.Memory(maxlen=1000000),
        target_network_update_frequency=1000,
        learning_starts=10000,
    )

    rl_loop.rl_loop_train(environment, agent, episodes=200000) 
Example #6
Source File: enjoy.py    From BackpropThroughTheVoidRL with MIT License 5 votes vote down vote up
def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    env = bench.Monitor(env, None)
    env = wrap_dqn(env)
    return env 
Example #7
Source File: wang2015_eval.py    From BackpropThroughTheVoidRL with MIT License 5 votes vote down vote up
def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    env_monitored = bench.Monitor(env, None)
    env = wrap_dqn(env_monitored)
    return env_monitored, env 
Example #8
Source File: enjoy_pong.py    From emdqn with MIT License 5 votes vote down vote up
def main():
    env = gym.make("PongNoFrameskip-v4")
    env = ScaledFloatFrame(wrap_dqn(env))
    act = deepq.load("pong_model.pkl")

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew) 
Example #9
Source File: train.py    From emdqn with MIT License 5 votes vote down vote up
def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    monitored_env = SimpleMonitor(env)  # puts rewards and number of steps in info, before environment is wrapped
    env = wrap_dqn(monitored_env)  # applies a bunch of modification to simplify the observation space (downsample, make b/w)
    return env, monitored_env 
Example #10
Source File: enjoy.py    From emdqn with MIT License 5 votes vote down vote up
def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    env = SimpleMonitor(env)
    env = wrap_dqn(env)
    return env 
Example #11
Source File: wang2015_eval.py    From emdqn with MIT License 5 votes vote down vote up
def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    env_monitored = SimpleMonitor(env)
    env = wrap_dqn(env_monitored)
    return env_monitored, env 
Example #12
Source File: wang2015_eval.py    From distributional-dqn with MIT License 5 votes vote down vote up
def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    env_monitored = SimpleMonitor(env)
    env = wrap_dqn(env_monitored)
    return env_monitored, env 
Example #13
Source File: enjoy.py    From deeprl-baselines with MIT License 5 votes vote down vote up
def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    env = bench.Monitor(env, None)
    env = wrap_dqn(env)
    return env 
Example #14
Source File: rainbow.py    From deeprl-baselines with MIT License 5 votes vote down vote up
def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    monitored_env = bench.SimpleMonitor(env)  # puts rewards and number of steps in info, before environment is wrapped
    env = wrap_dqn(monitored_env)
    return env, monitored_env 
Example #15
Source File: train.py    From deeprl-baselines with MIT License 5 votes vote down vote up
def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    monitored_env = bench.SimpleMonitor(env, logger.get_dir())  # puts rewards and number of steps in info, before environment is wrapped
    env = wrap_dqn(monitored_env)  # applies a bunch of modification to simplify the observation space (downsample, make b/w)
    return env, monitored_env 
Example #16
Source File: enjoy.py    From deeprl-baselines with MIT License 5 votes vote down vote up
def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    env = bench.Monitor(env, None)
    env = wrap_dqn(env)
    return env 
Example #17
Source File: wang2015_eval.py    From deeprl-baselines with MIT License 5 votes vote down vote up
def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    env_monitored = bench.Monitor(env, None)
    env = wrap_dqn(env_monitored)
    return env_monitored, env 
Example #18
Source File: enjoy_pong.py    From rl-attack-detection with MIT License 5 votes vote down vote up
def main():
    env = gym.make("PongNoFrameskip-v4")
    env = ScaledFloatFrame(wrap_dqn(env))
    act = deepq.load("pong_model.pkl")

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew) 
Example #19
Source File: train.py    From rl-attack-detection with MIT License 5 votes vote down vote up
def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    monitored_env = SimpleMonitor(env)  # puts rewards and number of steps in info, before environment is wrapped
    env = wrap_dqn(monitored_env)  # applies a bunch of modification to simplify the observation space (downsample, make b/w)
    return env, monitored_env 
Example #20
Source File: enjoy.py    From rl-attack-detection with MIT License 5 votes vote down vote up
def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    env = SimpleMonitor(env)
    env = wrap_dqn(env)
    return env 
Example #21
Source File: wang2015_eval.py    From rl-attack-detection with MIT License 5 votes vote down vote up
def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    env_monitored = SimpleMonitor(env)
    env = wrap_dqn(env_monitored)
    return env_monitored, env 
Example #22
Source File: train.py    From NoisyNet-DQN with MIT License 5 votes vote down vote up
def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    monitored_env = SimpleMonitor(env)  # puts rewards and number of steps in info, before environment is wrapped
    env = wrap_dqn(monitored_env)  # applies a bunch of modification to simplify the observation space (downsample, make b/w)
    return env, monitored_env 
Example #23
Source File: enjoy_pong.py    From learning2run with MIT License 5 votes vote down vote up
def main():
    env = gym.make("PongNoFrameskip-v4")
    env = ScaledFloatFrame(wrap_dqn(env))
    act = deepq.load("pong_model.pkl")

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew) 
Example #24
Source File: train.py    From learning2run with MIT License 5 votes vote down vote up
def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    monitored_env = SimpleMonitor(env)  # puts rewards and number of steps in info, before environment is wrapped
    env = wrap_dqn(monitored_env)  # applies a bunch of modification to simplify the observation space (downsample, make b/w)
    return env, monitored_env 
Example #25
Source File: enjoy.py    From learning2run with MIT License 5 votes vote down vote up
def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    env = SimpleMonitor(env)
    env = wrap_dqn(env)
    return env 
Example #26
Source File: wang2015_eval.py    From learning2run with MIT License 5 votes vote down vote up
def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    env_monitored = SimpleMonitor(env)
    env = wrap_dqn(env_monitored)
    return env_monitored, env