Python rl.core.Processor() Examples

The following are 3 code examples of rl.core.Processor(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module rl.core , or try the search function .
Example #1
Source File: agent_keras_rl_dqn.py    From neuron_poker with MIT License 5 votes vote down vote up
def play(self, nb_episodes=5, render=False):
        """Let the agent play"""
        memory = SequentialMemory(limit=memory_limit, window_length=window_length)
        policy = TrumpPolicy()

        class CustomProcessor(Processor):  # pylint: disable=redefined-outer-name
            """The agent and the environment"""

            def process_state_batch(self, batch):
                """
                Given a state batch, I want to remove the second dimension, because it's
                useless and prevents me from feeding the tensor into my CNN
                """
                return np.squeeze(batch, axis=1)

            def process_info(self, info):
                processed_info = info['player_data']
                if 'stack' in processed_info:
                    processed_info = {'x': 1}
                return processed_info

        nb_actions = self.env.action_space.n

        self.dqn = DQNAgent(model=self.model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup,
                            target_model_update=1e-2, policy=policy,
                            processor=CustomProcessor(),
                            batch_size=batch_size, train_interval=train_interval, enable_double_dqn=enable_double_dqn)
        self.dqn.compile(tf.optimizers.Adam(lr=1e-3), metrics=['mae'])  # pylint: disable=no-member

        self.dqn.test(self.env, nb_episodes=nb_episodes, visualize=render) 
Example #2
Source File: test_core.py    From keras-rl2 with MIT License 4 votes vote down vote up
def test_copy_observations():
    methods = [
        'fit',
        'test',
    ]

    for method in methods:
        original_observations = []

        class LocalEnv(Env):
            def __init__(self):
                super(LocalEnv, self).__init__()

            def step(self, action):
                self.state += 1
                done = self.state >= 6
                reward = float(self.state) / 10.
                obs = np.array(self.state)
                original_observations.append(obs)
                return obs, reward, done, {}

            def reset(self):
                self.state = 1
                return np.array(self.state)

            def seed(self, seed=None):
                pass

            def configure(self, *args, **kwargs):
                pass

        # Slight abuse of the processor for test purposes.
        observations = []

        class LocalProcessor(Processor):
            def process_step(self, observation, reward, done, info):
                observations.append(observation)
                return observation, reward, done, info

        processor = LocalProcessor()
        memory = SequentialMemory(100, window_length=1)
        agent = TestAgent(memory, processor=processor)
        env = LocalEnv()
        agent.compile()
        getattr(agent, method)(env, 20, verbose=0, visualize=False)

        assert len(observations) == len(original_observations)
        assert_allclose(np.array(observations), np.array(original_observations))
        assert np.all([o is not o_ for o, o_ in zip(original_observations, observations)]) 
Example #3
Source File: test_core.py    From keras-rl with MIT License 4 votes vote down vote up
def test_copy_observations():
    methods = [
        'fit',
        'test',
    ]

    for method in methods:
        original_observations = []

        class LocalEnv(Env):
            def __init__(self):
                super(LocalEnv, self).__init__()

            def step(self, action):
                self.state += 1
                done = self.state >= 6
                reward = float(self.state) / 10.
                obs = np.array(self.state)
                original_observations.append(obs)
                return obs, reward, done, {}

            def reset(self):
                self.state = 1
                return np.array(self.state)

            def seed(self, seed=None):
                pass

            def configure(self, *args, **kwargs):
                pass

        # Slight abuse of the processor for test purposes.
        observations = []

        class LocalProcessor(Processor):
            def process_step(self, observation, reward, done, info):
                observations.append(observation)
                return observation, reward, done, info

        processor = LocalProcessor()
        memory = SequentialMemory(100, window_length=1)
        agent = TestAgent(memory, processor=processor)
        env = LocalEnv()
        agent.compile()
        getattr(agent, method)(env, 20, verbose=0, visualize=False)

        assert len(observations) == len(original_observations)
        assert_allclose(np.array(observations), np.array(original_observations))
        assert np.all([o is not o_ for o, o_ in zip(original_observations, observations)])