Python baselines.common.tf_util.huber_loss() Examples
The following are 1
code examples of baselines.common.tf_util.huber_loss().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
baselines.common.tf_util
, or try the search function
.
Example #1
Source File: q_map_dqn_agent.py From qmap with MIT License | 4 votes |
def qmap_build_train(observation_space, coords_shape, model, n_actions, optimizer, grad_norm_clip, scope='q_map'): with tf.variable_scope(scope): ob_shape = observation_space.shape observations = tf.placeholder(tf.float32, [None] + list(ob_shape), name='observations') actions = tf.placeholder(tf.int32, [None], name='actions') target_qs = tf.placeholder(tf.float32, [None] + list(coords_shape), name='targets') weights = tf.placeholder(tf.float32, [None], name='weights') q_values = model(inpt=observations, n_actions=n_actions, scope='q_func') q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.get_variable_scope().name + "/q_func") target_q_values = model(inpt=observations, n_actions=n_actions, scope='target_q_func') target_q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.get_variable_scope().name + "/target_q_func") action_masks = tf.expand_dims(tf.expand_dims(tf.one_hot(actions, n_actions), axis=1), axis=1) qs_selected = tf.reduce_sum(q_values * action_masks, 3) td_errors = 1 * (qs_selected - target_qs) # TODO: coefficient? losses = tf.reduce_mean(tf.square(td_errors), [1, 2]) # TODO: find best, was U.huber_loss weighted_loss = tf.reduce_mean(weights * losses) if grad_norm_clip is not None: gradients = optimizer.compute_gradients(weighted_loss, var_list=q_func_vars) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_norm(grad, grad_norm_clip), var) optimize = optimizer.apply_gradients(gradients) grad_norms = [tf.norm(grad) for grad in gradients] else: optimize = optimizer.minimize(weighted_loss, var_list=q_func_vars) grad_norms = None update_target_expr = [] for var, var_target in zip(sorted(q_func_vars, key=lambda v: v.name), sorted(target_q_func_vars, key=lambda v: v.name)): update_target_expr.append(var_target.assign(var)) update_target_expr = tf.group(*update_target_expr) errors = tf.reduce_mean(tf.abs(td_errors), [1, 2]) # TODO: try with the losses directly compute_q_values = U.function(inputs=[observations], outputs=q_values) compute_double_q_values = U.function(inputs=[observations], outputs=[q_values, target_q_values]) train = U.function(inputs=[observations, actions, target_qs, weights], outputs=errors, updates=[optimize]) update_target = U.function([], [], updates=[update_target_expr]) trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope) train_debug = U.function(inputs=[observations, actions, target_qs, weights], outputs=[errors, weighted_loss, grad_norms, trainable_vars], updates=[optimize]) return compute_q_values, compute_double_q_values, train, update_target, train_debug