Python agent.Agent() Examples

The following are 18 code examples of agent.Agent(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module agent , or try the search function .
Example #1
Source File: play.py    From D4PG with MIT License 6 votes vote down vote up
def play():
    # Set random seeds for reproducability
    np.random.seed(play_params.RANDOM_SEED)
    tf.set_random_seed(play_params.RANDOM_SEED)
         
    # Create session
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)     
            
    # Initialise agent
    agent = Agent(sess, play_params.ENV, play_params.RANDOM_SEED)
    # Build network
    agent.build_network(training=False)
    
    # Run network in environment
    agent.play()
    
    sess.close() 
Example #2
Source File: test.py    From D4PG with MIT License 6 votes vote down vote up
def test():
    # Set random seeds for reproducability
    np.random.seed(test_params.RANDOM_SEED)
    tf.set_random_seed(test_params.RANDOM_SEED)
         
    # Create session
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)     
            
    # Initialise agent
    agent = Agent(sess, test_params.ENV, test_params.RANDOM_SEED)
    # Build network
    agent.build_network(training=False)
    
    # Test network
    agent.test()
    
    sess.close() 
Example #3
Source File: main.py    From voltha with Apache License 2.0 6 votes vote down vote up
def main():

    args = parse_options()

    logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)

    store = ObjectStore()
    backend = MockBackend(store, in_out_iface=args.in_out_iface,
                                 in_out_stag=None if args.in_out_stag is None else int(args.in_out_stag))
    agent = Agent(args.controller, int(args.datapath_id), store, backend)
    store.set_agent(agent)
    backend.set_agent(agent)

    try:
        agent.run()
    except KeyboardInterrupt:
        logging.info("Ctrl-c received! Shutting down connection and exiting...")
        agent.stop()
        backend.stop() 
Example #4
Source File: main.py    From voltha with Apache License 2.0 6 votes vote down vote up
def parse_options():
    parser = ArgumentParser("pyofagent - Python-based Open Flow Agent")
    parser.add_argument("-c", "--controller", #dest="controller",
                        help="Controller host:port to connect to", metavar="HOST:PORT",
                        default="localhost:6633")
    parser.add_argument("-d", "--devid", dest="datapath_id",
                        help="Device identified", metavar="DEVID",
                        default=42)
    parser.add_argument("-v", "--verbose", action='store_true', #dest=verbose,
                        default="enable verbose logging (log-level is DEBUG)")
    parser.add_argument("-I", "--in-out-iface", metavar="IN-OUT-IFACE",
                        help="Local interface to receve/send in-out frames",)
    parser.add_argument("-S", "--in-out-stag", metavar="IN-OUT-STAG",
                        help="Expect/Apply given s-tag when receiving/sending frames"+
                             "at the in-out interface")
    return parser.parse_args() 
Example #5
Source File: main.py    From TFSegmentation with Apache License 2.0 5 votes vote down vote up
def main():
    args = get_params()
    args = create_exp_dirs(args)
    agent = Agent(args)
    agent.run() 
Example #6
Source File: connection_mgr.py    From voltha with Apache License 2.0 5 votes vote down vote up
def create_agent(self, device):
        datapath_id = device.datapath_id
        device_id = device.id
        for controller_endpoint in self.controller_endpoints:
            agent = Agent(controller_endpoint, datapath_id,
                          device_id, self.grpc_client, self.enable_tls,
                          self.key_file, self.cert_file)
            agent.start()
            self.agent_map[(datapath_id,controller_endpoint)] = agent
            self.device_id_to_datapath_id_map[device_id] = datapath_id 
Example #7
Source File: connection_mgr.py    From voltha with Apache License 2.0 5 votes vote down vote up
def __init__(self, consul_endpoint, vcore_endpoint, vcore_grpc_timeout,
                 controller_endpoints, instance_id,
                 enable_tls=False, key_file=None, cert_file=None,
                 vcore_retry_interval=0.5, devices_refresh_interval=5,
                 subscription_refresh_interval=5):

        self.log = get_logger()
        self.log.info('init-connection-manager')
        self.log.info('list-of-controllers',
                      controller_endpoints=controller_endpoints)

        self.controller_endpoints = controller_endpoints
        self.consul_endpoint = consul_endpoint
        self.vcore_endpoint = vcore_endpoint
        self.grpc_timeout = vcore_grpc_timeout
        self.instance_id = instance_id
        self.enable_tls = enable_tls
        self.key_file = key_file
        self.cert_file = cert_file

        self.channel = None
        self.grpc_client = None  # single, shared gRPC client to vcore

        self.agent_map = {}  # (datapath_id, controller_endpoint) -> Agent()
        self.device_id_to_datapath_id_map = {}

        self.vcore_retry_interval = vcore_retry_interval
        self.devices_refresh_interval = devices_refresh_interval
        self.subscription_refresh_interval = subscription_refresh_interval
        self.subscription = None

        self.running = False 
Example #8
Source File: maze_environment.py    From Hands-on-Neuroevolution-with-Python with MIT License 4 votes vote down vote up
def read_environment(file_path):
    """
    The function to read maze environment configuration from provided
    file.
    Arguments:
        file_path: The path to the file to read maze configuration from.
    Returns:
        The initialized maze environment.
    """
    num_lines, index = -1, 0
    walls = []
    maze_agent, maze_exit = None, None
    with open(file_path, 'r') as file:
        for line in file.readlines():
            line = line.strip()
            if len(line) == 0:
                # skip empty lines
                continue

            if index == 0:
                # read the number of line segments
                num_lines = int(line)
            elif index == 1:
                # read the agent's position
                loc = geometry.read_point(line)
                maze_agent = agent.Agent(location=loc)
            elif index == 2:
                # read the agent's initial heading
                maze_agent.heading = float(line)
            elif index == 3:
                # read the maze exit location
                maze_exit = geometry.read_point(line)
            else:
                # read the walls
                wall = geometry.read_line(line)
                walls.append(wall)

            # increment cursor
            index += 1

    assert len(walls) == num_lines

    print("Maze environment configured successfully from the file: %s" % file_path)
    # create and return the maze environment
    return MazeEnvironment(agent=maze_agent, walls=walls, exit_point=maze_exit) 
Example #9
Source File: train.py    From D4PG with MIT License 4 votes vote down vote up
def train():
    
    tf.reset_default_graph()
    
    # Set random seeds for reproducability
    np.random.seed(train_params.RANDOM_SEED)
    random.seed(train_params.RANDOM_SEED)
    tf.set_random_seed(train_params.RANDOM_SEED)
    
    # Initialise prioritised experience replay memory
    PER_memory = PrioritizedReplayBuffer(train_params.REPLAY_MEM_SIZE, train_params.PRIORITY_ALPHA)
    # Initialise Gaussian noise generator
    gaussian_noise = GaussianNoiseGenerator(train_params.ACTION_DIMS, train_params.ACTION_BOUND_LOW, train_params.ACTION_BOUND_HIGH, train_params.NOISE_SCALE)
            
    # Create session
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)  
    
    # Create threads for learner process and agent processes       
    threads = []
    # Create threading events for communication and synchronisation between the learner and agent threads
    run_agent_event = threading.Event()
    stop_agent_event = threading.Event()
    
    # with tf.device('/device:GPU:0'):
    # Initialise learner
    learner = Learner(sess, PER_memory, run_agent_event, stop_agent_event)
    # Build learner networks
    learner.build_network()
    # Build ops to update target networks
    learner.build_update_ops()
    # Initialise variables (either from ckpt file if given, or from random)
    learner.initialise_vars()
    # Get learner policy (actor) network params - agent needs these to copy latest policy params periodically
    learner_policy_params = learner.actor_net.network_params + learner.actor_net.bn_params
    
    threads.append(threading.Thread(target=learner.run))
    
    
    for n_agent in range(train_params.NUM_AGENTS):
        # Initialise agent
        agent = Agent(sess, train_params.ENV, train_params.RANDOM_SEED, n_agent)
        # Build network
        agent.build_network(training=True)
        # Build op to periodically update agent network params from learner network
        agent.build_update_op(learner_policy_params)
        # Create Tensorboard summaries to save episode rewards
        if train_params.LOG_DIR is not None:
            agent.build_summaries(train_params.LOG_DIR + ('/agent_%02d' % n_agent))
 
        threads.append(threading.Thread(target=agent.run, args=(PER_memory, gaussian_noise, run_agent_event, stop_agent_event)))
    
    for t in threads:
        t.start()
        
    for t in threads:
        t.join()
    
    sess.close() 
Example #10
Source File: maze_environment.py    From Hands-on-Neuroevolution-with-Python with MIT License 4 votes vote down vote up
def update(self, control_signals):
        """
        The function to update solver agent position within maze. After agent position
        updated it will be checked to find out if maze exit was reached afetr that.
        Arguments:
            control_signals: The control signals received from the control ANN
        Returns:
            The True if maze exit was found after update or maze exit was already
            found in previous simulation cycles.
        """
        if self.exit_found:
            # Maze exit already found
            return True

        # Apply control signals
        self.apply_control_signals(control_signals)

        # get X and Y velocity components
        vx = math.cos(geometry.deg_to_rad(self.agent.heading)) * self.agent.speed
        vy = math.sin(geometry.deg_to_rad(self.agent.heading)) * self.agent.speed

        # Update current Agent's heading (we consider the simulation time step size equal to 1s
        # and the angular velocity as degrees per second)
        self.agent.heading += self.agent.angular_vel

        # Enforce angular velocity bounds by wrapping
        if self.agent.heading > 360:
            self.agent.heading -= 360
        elif self.agent.heading < 0:
            self.agent.heading += 360

        # find the next location of the agent
        new_loc = geometry.Point(
            x = self.agent.location.x + vx, 
            y = self.agent.location.y + vy
        )

        if not self.test_wall_collision(new_loc):
            self.agent.location = new_loc

        # update agent's sensors
        self.update_rangefinder_sensors()
        self.update_radars()

        # check if agent reached exit point
        distance = self.agent_distance_to_exit()
        self.exit_found = (distance < self.exit_range)
        return self.exit_found 
Example #11
Source File: agent_nl_rule_soft.py    From KB-InfoBot with MIT License 4 votes vote down vote up
def next(self, user_action, verbose=False):
        self._update_state(user_action['nl_sentence'], upd=self.upd, verbose=verbose)
        self.state['turn'] += 1

        act = {}
        act['diaact'] = 'UNK'
        act['request_slots'] = {}
        act['target'] = []

        db_probs = self._check_db()
        H_db = tools.entropy_p(db_probs)
        H_slots = calc_entropies(self.state['inform_slots'], db_probs, self.state['database'])
        if verbose:
            print 'Agent DB entropy = ', H_db
            print 'Agent slot belief entropies - '
            print ' '.join(['%s:%.2f' %(k,v) for k,v in H_slots.iteritems()])

        if H_db < self.tr:
            # agent reasonable confident, inform
            act['diaact'] = 'inform'
            act['target'] = self._inform(db_probs)
        else:
            sorted_entropies = sorted(H_slots.items(), key=operator.itemgetter(1), reverse=True)
            req = False
            for (s,h) in sorted_entropies:
                if H_slots[s]<self.frac*self.state['init_entropy'][s] or H_slots[s]<self.ts or \
                        self.state['num_requests'][s] >= self.max_req:
                    continue
                act['diaact'] = 'request'
                act['request_slots'][s] = 'UNK'
                self.state['prevact'] = 'request@%s' %s
                self.state['num_requests'][s] += 1
                req = True
                break
            if not req:
                # agent confident about all slots, inform
                act['diaact'] = 'inform'
                act['target'] = self._inform(db_probs)
                self.state['prevact'] = 'inform@inform'

        act['probs'] = [np.concatenate([self.state['inform_slots'][s]/self.state['inform_slots'][s].sum(), \
                np.asarray([float(self.state['database'].inv_counts[s][-1])/self.state['database'].N])]) \
                for s in dialog_config.inform_slots]
        act['phis'] = [1. if s in self.state['dont_care'] else 0. for s in dialog_config.inform_slots]
        act['posterior'] = db_probs
        return act 
Example #12
Source File: maze_environment.py    From Hands-on-Neuroevolution-with-Python with MIT License 4 votes vote down vote up
def update(self, control_signals):
        """
        The function to update solver agent position within maze. After agent position
        updated it will be checked to find out if maze exit was reached afetr that.
        Arguments:
            control_signals: The control signals received from the control ANN
        Returns:
            The True if maze exit was found after update or maze exit was already
            found in previous simulation cycles.
        """
        if self.exit_found:
            # Maze exit already found
            return True

        # Apply control signals
        self.apply_control_signals(control_signals)

        # get X and Y velocity components
        vx = math.cos(geometry.deg_to_rad(self.agent.heading)) * self.agent.speed
        vy = math.sin(geometry.deg_to_rad(self.agent.heading)) * self.agent.speed

        # Update current Agent's heading (we consider the simulation time step size equal to 1s
        # and the angular velocity as degrees per second)
        self.agent.heading += self.agent.angular_vel

        # Enforce angular velocity bounds by wrapping
        if self.agent.heading > 360:
            self.agent.heading -= 360
        elif self.agent.heading < 0:
            self.agent.heading += 360

        # find the next location of the agent
        new_loc = geometry.Point(
            x = self.agent.location.x + vx, 
            y = self.agent.location.y + vy
        )

        if not self.test_wall_collision(new_loc):
            self.agent.location = new_loc

        # update agent's sensors
        self.update_rangefinder_sensors()
        self.update_radars()

        # check if agent reached exit point
        distance = self.agent_distance_to_exit()
        self.exit_found = (distance < self.exit_range)
        return self.exit_found 
Example #13
Source File: maze_environment.py    From Hands-on-Neuroevolution-with-Python with MIT License 4 votes vote down vote up
def read_environment(file_path):
    """
    The function to read maze environment configuration from provided
    file.
    Arguments:
        file_path: The path to the file to read maze configuration from.
    Returns:
        The initialized maze environment.
    """
    num_lines, index = -1, 0
    walls = []
    maze_agent, maze_exit = None, None
    with open(file_path, 'r') as file:
        for line in file.readlines():
            line = line.strip()
            if len(line) == 0:
                # skip empty lines
                continue

            if index == 0:
                # read the number of line segments
                num_lines = int(line)
            elif index == 1:
                # read the agent's position
                loc = geometry.read_point(line)
                maze_agent = agent.Agent(location=loc)
            elif index == 2:
                # read the agent's initial heading
                maze_agent.heading = float(line)
            elif index == 3:
                # read the maze exit location
                maze_exit = geometry.read_point(line)
            else:
                # read the walls
                wall = geometry.read_line(line)
                walls.append(wall)

            # increment cursor
            index += 1

    assert len(walls) == num_lines

    print("Maze environment configured successfully from the file: %s" % file_path)
    # create and return the maze environment
    return MazeEnvironment(agent=maze_agent, walls=walls, exit_point=maze_exit) 
Example #14
Source File: maze_environment.py    From Hands-on-Neuroevolution-with-Python with MIT License 4 votes vote down vote up
def update(self, control_signals):
        """
        The function to update solver agent position within maze. After agent position
        updated it will be checked to find out if maze exit was reached afetr that.
        Arguments:
            control_signals: The control signals received from the control ANN
        Returns:
            The True if maze exit was found after update or maze exit was already
            found in previous simulation cycles.
        """
        if self.exit_found:
            # Maze exit already found
            return True

        # Apply control signals
        self.apply_control_signals(control_signals)

        # get X and Y velocity components
        vx = math.cos(geometry.deg_to_rad(self.agent.heading)) * self.agent.speed
        vy = math.sin(geometry.deg_to_rad(self.agent.heading)) * self.agent.speed

        # Update current Agent's heading (we consider the simulation time step size equal to 1s
        # and the angular velocity as degrees per second)
        self.agent.heading += self.agent.angular_vel

        # Enforce angular velocity bounds by wrapping
        if self.agent.heading > 360:
            self.agent.heading -= 360
        elif self.agent.heading < 0:
            self.agent.heading += 360

        # find the next location of the agent
        new_loc = geometry.Point(
            x = self.agent.location.x + vx, 
            y = self.agent.location.y + vy
        )

        if not self.test_wall_collision(new_loc):
            self.agent.location = new_loc

        # update agent's sensors
        self.update_rangefinder_sensors()
        self.update_radars()

        # check if agent reached exit point
        distance = self.agent_distance_to_exit()
        self.exit_found = (distance < self.exit_range)
        return self.exit_found 
Example #15
Source File: my_env.py    From RL-Surgical-Gesture-Segmentation with MIT License 4 votes vote down vote up
def _get_state(self):

        if self.position >= self.episode_len:
            raise Exception('Agent out of environment')

        state = []

        if self.mode == 'full':

            state.append(self.feature[self.position])
            for g in self.glimpse:
                if self.position + g < self.episode_len:
                    state.append(self.feature[self.position + g])
                else:
                    state.append(np.zeros(self.feature_num))
                    
            state.append(self.agent.get_state_vector())
            state.append(self.agent.get_hints_vector())

        elif self.mode == 'no_tcn':

            state.append(self.agent.get_state_vector())
            state.append(self.agent.get_hints_vector())

        elif self.mode == 'no_future':

            state.append(self.feature[self.position])

            state.append(self.agent.get_state_vector())
            state.append(self.agent.get_hints_vector())

        elif self.mode == 'no_hint':

            state.append(self.feature[self.position])
            for g in self.glimpse:
                if self.position + g < self.episode_len:
                    state.append(self.feature[self.position + g])
                else:
                    state.append(np.zeros(self.feature_num))

        else:
            raise Exception('Invalid Env Mode!')

        state = np.concatenate(state)
        return state 
Example #16
Source File: my_env.py    From RL-Surgical-Gesture-Segmentation with MIT License 4 votes vote down vote up
def __init__(self,
                 dataset, 
                 statistical_model,
                 class_num,
                 feature_num,
                 k_steps,
                 glimpse,
                 reward_alpha,
                 mode):  # glimpse should > 0 

        self.dataset = dataset
        self.k_steps = k_steps
        self.glimpse = glimpse
        self.reward_alpha = reward_alpha

        self.class_num = class_num
        self.feature_num = feature_num

        self.agent = Agent(name='CleverChang',
                           state_num=self.class_num,
                           **statistical_model)

        self.action_num = len(self.k_steps) * self.class_num
        self.action_space = spaces.Discrete(self.action_num)

        self.mode = mode

        if self.mode == 'full':
            self.observation_num = self.feature_num * (len(self.glimpse)+1) + \
                                                        2 * self.class_num
        elif self.mode == 'no_tcn':
            self.observation_num = 2 * self.class_num
        elif self.mode == 'no_future':
            self.observation_num = self.feature_num + 2 * self.class_num
        elif self.mode == 'no_hint':
            self.observation_num = self.feature_num * (len(self.glimpse)+1)
        else:
            raise Exception('Invalid Env Mode!')

        bounds = np.ones(self.observation_num) * np.inf             # To be improved
        self.observation_space = spaces.Box(-bounds, bounds)

        self.state = None 
Example #17
Source File: agent_nl_rule_hard.py    From KB-InfoBot with MIT License 4 votes vote down vote up
def next(self, user_action, verbose=False):
        self._update_state(user_action['nl_sentence'], upd=self.upd, verbose=verbose)
        self.state['turn'] += 1

        act = {}
        act['diaact'] = 'UNK'
        act['request_slots'] = {}
        act['target'] = []

        db_status, db_index = self._check_db()
        H_slots = {}
        for s in dialog_config.inform_slots:
            s_p = self.state['inform_slots'][s]/self.state['inform_slots'][s].sum()
            H_slots[s] = tools.entropy_p(s_p)
        sorted_entropies = sorted(H_slots.items(), key=operator.itemgetter(1), reverse=True)
        if verbose:
            print 'Agent slot belief entropies - '
            print ' '.join(['%s:%.2f' %(k,v) for k,v in H_slots.iteritems()])

        if not db_status:
            # no match, some error, re-ask some slot
            act['diaact'] = 'request'
            request_slot = random.choice(self.state['inform_slots'].keys())
            act['request_slots'][request_slot] = 'UNK'
            self.state['prevact'] = 'request@%s' %request_slot
            self.state['num_requests'][request_slot] += 1
        elif len(db_status)==1:
            act['diaact'] = 'inform'
            act['target'] = self._inform(db_index)
            self.state['prevact'] = 'inform@inform'
        else:
            req = False
            for (s,h) in sorted_entropies:
                if H_slots[s]<self.frac*self.state['init_entropy'][s] or H_slots[s]<self.ts or \
                        self.state['num_requests'][s] >= self.max_req:
                    continue
                act['diaact'] = 'request'
                act['request_slots'][s] = 'UNK'
                self.state['prevact'] = 'request@%s' %s
                self.state['num_requests'][s] += 1
                req = True
                break
            if not req:
                # agent confident about all slots, inform
                act['diaact'] = 'inform'
                act['target'] = self._inform(db_index)
                self.state['prevact'] = 'inform@inform'

        act['posterior'] = np.zeros((len(self.database.labels),))
        act['posterior'][db_index] = 1./len(db_index)

        return act 
Example #18
Source File: agent_nl_rule_no.py    From KB-InfoBot with MIT License 4 votes vote down vote up
def next(self, user_action, verbose=False):
        self._update_state(user_action['nl_sentence'], upd=self.upd, verbose=verbose)
        self.state['turn'] += 1

        act = {}
        act['diaact'] = 'UNK'
        act['request_slots'] = {}
        act['target'] = []

        db_probs = self._check_db()
        H_slots = {}
        for s in dialog_config.inform_slots:
            s_p = self.state['inform_slots'][s]/self.state['inform_slots'][s].sum()
            H_slots[s] = tools.entropy_p(s_p)
        if verbose:
            print 'Agent slot belief entropies - '
            print ' '.join(['%s:%.2f' %(k,v) for k,v in H_slots.iteritems()])

        sorted_entropies = sorted(H_slots.items(), key=operator.itemgetter(1), reverse=True)
        req = False
        for (s,h) in sorted_entropies:
            if H_slots[s]<self.frac*self.state['init_entropy'][s] or H_slots[s]<self.ts or \
                    self.state['num_requests'][s] >= self.max_req:
                continue
            act['diaact'] = 'request'
            act['request_slots'][s] = 'UNK'
            self.state['prevact'] = 'request@%s' %s
            self.state['num_requests'][s] += 1
            req = True
            break
        if not req:
            # agent confident about all slots, inform
            act['diaact'] = 'inform'
            act['target'] = self._inform(db_probs)
            self.state['prevact'] = 'inform@inform'

        act['probs'] = [np.concatenate([self.state['inform_slots'][s]/ \
                self.state['inform_slots'][s].sum(), \
                np.asarray([float(self.state['database'].inv_counts[s][-1])/ \
                self.state['database'].N])]) \
                for s in dialog_config.inform_slots]
        act['phis'] = [1. if s in self.state['dont_care'] else 0. for s in dialog_config.inform_slots]
        act['posterior'] = db_probs
        return act