Python tensorflow.clip_by_norm() Examples
The following are 30
code examples of tensorflow.clip_by_norm().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow
, or try the search function
.
Example #1
Source File: graph_builder.py From DOTA_models with Apache License 2.0 | 9 votes |
def _clip_gradients(self, grad): """Clips gradients if the hyperparameter `gradient_clip_norm` requires it. Sparse tensors, in the form of IndexedSlices returned for the gradients of embeddings, require special handling. Args: grad: Gradient Tensor, IndexedSlices, or None. Returns: Optionally clipped gradient. """ if grad is not None and self.hyperparams.gradient_clip_norm > 0: logging.info('Clipping gradient %s', grad) if isinstance(grad, tf.IndexedSlices): tmp = tf.clip_by_norm(grad.values, self.hyperparams.gradient_clip_norm) return tf.IndexedSlices(tmp, grad.indices, grad.dense_shape) else: return tf.clip_by_norm(grad, self.hyperparams.gradient_clip_norm) else: return grad
Example #2
Source File: graph_builder.py From yolo_v2 with Apache License 2.0 | 6 votes |
def _clip_gradients(self, grad): """Clips gradients if the hyperparameter `gradient_clip_norm` requires it. Sparse tensors, in the form of IndexedSlices returned for the gradients of embeddings, require special handling. Args: grad: Gradient Tensor, IndexedSlices, or None. Returns: Optionally clipped gradient. """ if grad is not None and self.hyperparams.gradient_clip_norm > 0: logging.info('Clipping gradient %s', grad) if isinstance(grad, tf.IndexedSlices): tmp = tf.clip_by_norm(grad.values, self.hyperparams.gradient_clip_norm) return tf.IndexedSlices(tmp, grad.indices, grad.dense_shape) else: return tf.clip_by_norm(grad, self.hyperparams.gradient_clip_norm) else: return grad
Example #3
Source File: TfEnas.py From rafiki with Apache License 2.0 | 6 votes |
def _optimize(self, loss, step, **knobs): opt_momentum = knobs['opt_momentum'] # Momentum optimizer momentum grad_clip_norm = knobs['grad_clip_norm'] # L2 norm to clip gradients by # Compute learning rate, gradients tf_trainable_vars = tf.trainable_variables() lr = self._get_learning_rate(step, **knobs) grads = tf.gradients(loss, tf_trainable_vars) self._mark_for_monitoring('lr', lr) # Clip gradients if grad_clip_norm > 0: grads = [tf.clip_by_norm(x, grad_clip_norm) for x in grads] # Init optimizer opt = tf.train.MomentumOptimizer(lr, opt_momentum, use_locking=True, use_nesterov=True) train_op = opt.apply_gradients(zip(grads, tf_trainable_vars), global_step=step) return train_op
Example #4
Source File: model.py From deeping-flow with MIT License | 6 votes |
def __init__(self, model, args): self.args = args with tf.variable_scope('supervisor_loss'): optimizer = tf.train.AdamOptimizer( args.ml_lr, beta1=0.9, beta2=0.98, epsilon=1e-8) loss = self.compute_loss(model) gradients = optimizer.compute_gradients(loss) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = ( tf.clip_by_norm(grad, args.clip_norm), var) self.train_op = optimizer.apply_gradients( gradients, global_step=model.global_step) tf.summary.scalar('loss', loss) self.merged = tf.summary.merge_all()
Example #5
Source File: model.py From deeping-flow with MIT License | 6 votes |
def __init__(self, model, args): self.args = args with tf.variable_scope('mix_loss'): optimizer = tf.train.AdamOptimizer( args.lr, beta1=0.9, beta2=0.98, epsilon=1e-8) loss, reward, baseline, advantage = self.compute_loss(model) gradients = optimizer.compute_gradients(loss) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = ( tf.clip_by_norm(grad, args.clip_norm), var) self.train_op = optimizer.apply_gradients( gradients, global_step=model.global_step) tf.summary.scalar('loss', loss) tf.summary.scalar("reward", tf.reduce_mean(reward)) tf.summary.scalar("baseline", tf.reduce_mean(baseline)) tf.summary.scalar("advantage", tf.reduce_mean(advantage)) self.merged = tf.summary.merge_all()
Example #6
Source File: model.py From deeping-flow with MIT License | 6 votes |
def __init__(self, model, args): self.args = args with tf.variable_scope('reinforced_loss'): optimizer = tf.train.AdamOptimizer( args.lr, beta1=0.9, beta2=0.98, epsilon=1e-8) loss, reward, baseline, advantage = self.compute_loss(model) gradients = optimizer.compute_gradients(loss) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = ( tf.clip_by_norm(grad, args.clip_norm), var) self.train_op = optimizer.apply_gradients( gradients, global_step=model.global_step) tf.summary.scalar('loss', loss) tf.summary.scalar("reward", tf.reduce_mean(reward)) tf.summary.scalar("baseline", tf.reduce_mean(baseline)) tf.summary.scalar("advantage", tf.reduce_mean(advantage)) self.merged = tf.summary.merge_all()
Example #7
Source File: utils.py From youtube8mchallenge with Apache License 2.0 | 6 votes |
def clip_gradient_norms(gradients_to_variables, max_norm): """Clips the gradients by the given value. Args: gradients_to_variables: A list of gradient to variable pairs (tuples). max_norm: the maximum norm value. Returns: A list of clipped gradient to variable pairs. """ clipped_grads_and_vars = [] for grad, var in gradients_to_variables: if grad is not None: if isinstance(grad, tf.IndexedSlices): tmp = tf.clip_by_norm(grad.values, max_norm) grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape) else: grad = tf.clip_by_norm(grad, max_norm) clipped_grads_and_vars.append((grad, var)) return clipped_grads_and_vars
Example #8
Source File: utils.py From youtube-8m with Apache License 2.0 | 6 votes |
def clip_gradient_norms(gradients_to_variables, max_norm): """Clips the gradients by the given value. Args: gradients_to_variables: A list of gradient to variable pairs (tuples). max_norm: the maximum norm value. Returns: A list of clipped gradient to variable pairs. """ clipped_grads_and_vars = [] for grad, var in gradients_to_variables: if grad is not None: if isinstance(grad, tf.IndexedSlices): tmp = tf.clip_by_norm(grad.values, max_norm) grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape) else: grad = tf.clip_by_norm(grad, max_norm) clipped_grads_and_vars.append((grad, var)) return clipped_grads_and_vars
Example #9
Source File: local_optimizers.py From rlgraph with Apache License 2.0 | 6 votes |
def _graph_fn_calculate_gradients(self, variables, loss, time_percentage): """ Args: variables (DataOpTuple): A list of variables to calculate gradients for. loss (SingeDataOp): The total loss over a batch to be minimized. """ if get_backend() == "tf": var_list = list(variables.values()) if isinstance(variables, dict) else force_list(variables) grads_and_vars = self.optimizer.compute_gradients( loss=loss, var_list=var_list ) if self.clip_grad_norm is not None: for i, (grad, var) in enumerate(grads_and_vars): if grad is not None: grads_and_vars[i] = (tf.clip_by_norm(t=grad, clip_norm=self.clip_grad_norm), var) return DataOpTuple(grads_and_vars)
Example #10
Source File: tf_util.py From stable-baselines with MIT License | 6 votes |
def flatgrad(loss, var_list, clip_norm=None): """ calculates the gradient and flattens it :param loss: (float) the loss value :param var_list: ([TensorFlow Tensor]) the variables :param clip_norm: (float) clip the gradients (disabled if None) :return: ([TensorFlow Tensor]) flattened gradient """ grads = tf.gradients(loss, var_list) if clip_norm is not None: grads = [tf.clip_by_norm(grad, clip_norm=clip_norm) for grad in grads] return tf.concat(axis=0, values=[ tf.reshape(grad if grad is not None else tf.zeros_like(v), [numel(v)]) for (v, grad) in zip(var_list, grads) ])
Example #11
Source File: optimizer.py From BERT with Apache License 2.0 | 6 votes |
def grad_clip_fn(self, loss, tvars, **kargs): grads = tf.gradients(loss, tvars) grad_clip = self.config.get("grad_clip", "global_norm") tf.logging.info(" gradient clip method {}".format(grad_clip)) if grad_clip == "global_norm": clip_norm = self.config.get("clip_norm", 1.0) [grads, _] = tf.clip_by_global_norm(grads, clip_norm=clip_norm) elif grad_clip == "norm": clip_norm = self.config.get("clip_norm", 1.0) grads = [tf.clip_by_norm(grad, clip_norm) for grad in grads] elif grad_clip == "value": clip_min_value = self.config.get("clip_min_value", -1.0) clip_max_value = self.config.get("clip_max_value", 1.0) grads = [tf.clip_by_value(grad, clip_norm) for grad in grads] else: grads = grads return grads
Example #12
Source File: hvd_distributed_optimizer.py From BERT with Apache License 2.0 | 6 votes |
def grad_clip_fn(self, opt, loss, tvars, **kargs): grads_and_vars = opt.compute_gradients(loss, tvars) grads = [grad for grad, _ in grads_and_vars] grad_clip = self.config.get("grad_clip", "global_norm") tf.logging.info(" gradient clip method {}".format(grad_clip)) if grad_clip == "global_norm": clip_norm = self.config.get("clip_norm", 1.0) [grads, _] = tf.clip_by_global_norm(grads, clip_norm=clip_norm) elif grad_clip == "norm": clip_norm = self.config.get("clip_norm", 1.0) grads = [tf.clip_by_norm(grad, clip_norm) for grad in grads] elif grad_clip == "value": clip_min_value = self.config.get("clip_min_value", -1.0) clip_max_value = self.config.get("clip_max_value", 1.0) grads = [tf.clip_by_value(grad, clip_norm) for grad in grads] else: grads = grads return grads
Example #13
Source File: clip_ops_test.py From deep_image_model with Apache License 2.0 | 6 votes |
def testClipByNormClipped(self): # Norm clipping when clip_norm < 5 with self.test_session(): x = tf.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) # Norm of x = sqrt(3^2 + 4^2) = 5 np_ans = [[-2.4, 0.0, 0.0], [3.2, 0.0, 0.0]] clip_norm = 4.0 ans = tf.clip_by_norm(x, clip_norm) tf_ans = ans.eval() clip_tensor = tf.constant(4.0) ans = tf.clip_by_norm(x, clip_norm) tf_ans_tensor = ans.eval() self.assertAllClose(np_ans, tf_ans) self.assertAllClose(np_ans, tf_ans_tensor)
Example #14
Source File: model.py From Seq2Seq-Tensorflow with MIT License | 6 votes |
def build_model(self): self.build_memory() self.W = tf.Variable(tf.random_normal([self.edim, self.nwords], stddev=self.init_std)) z = tf.matmul(self.hid[-1], self.W) self.loss = tf.nn.softmax_cross_entropy_with_logits(z, self.target) self.lr = tf.Variable(self.current_lr) self.opt = tf.train.GradientDescentOptimizer(self.lr) params = [self.A, self.B, self.C, self.T_A, self.T_B, self.W] grads_and_vars = self.opt.compute_gradients(self.loss,params) clipped_grads_and_vars = [(tf.clip_by_norm(gv[0], self.max_grad_norm), gv[1]) \ for gv in grads_and_vars] inc = self.global_step.assign_add(1) with tf.control_dependencies([inc]): self.optim = self.opt.apply_gradients(clipped_grads_and_vars) tf.initialize_all_variables().run() self.saver = tf.train.Saver()
Example #15
Source File: graph_builder.py From Gun-Detector with Apache License 2.0 | 6 votes |
def _clip_gradients(self, grad): """Clips gradients if the hyperparameter `gradient_clip_norm` requires it. Sparse tensors, in the form of IndexedSlices returned for the gradients of embeddings, require special handling. Args: grad: Gradient Tensor, IndexedSlices, or None. Returns: Optionally clipped gradient. """ if grad is not None and self.hyperparams.gradient_clip_norm > 0: logging.info('Clipping gradient %s', grad) if isinstance(grad, tf.IndexedSlices): tmp = tf.clip_by_norm(grad.values, self.hyperparams.gradient_clip_norm) return tf.IndexedSlices(tmp, grad.indices, grad.dense_shape) else: return tf.clip_by_norm(grad, self.hyperparams.gradient_clip_norm) else: return grad
Example #16
Source File: train.py From text-gan-tensorflow with MIT License | 6 votes |
def set_train_op(loss, tvars): if FLAGS.optimizer_type == "sgd": optimizer = tf.train.GradientDescentOptimizer(learning_rate=FLAGS.learning_rate) elif FLAGS.optimizer_type == "rmsprop": optimizer = tf.train.RMSPropOptimizer(learning_rate=FLAGS.learning_rate) elif FLAGS.optimizer_type == "adam": optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) else: raise ValueError("Wrong optimizer_type.") gradients = optimizer.compute_gradients(loss, var_list=tvars) clipped_gradients = [(grad if grad is None else tf.clip_by_norm(grad, FLAGS.max_grads), var) for grad, var in gradients] train_op = optimizer.apply_gradients(clipped_gradients) return train_op
Example #17
Source File: models.py From Question_Answering_Models with MIT License | 6 votes |
def add_train_op(self, loss): """ 训练节点 """ with tf.name_scope('train_op'): # 记录训练步骤 self.global_step = tf.Variable(0, name='global_step', trainable=False) opt = tf.train.AdamOptimizer(self.config.lr) # train_op = opt.minimize(loss, self.global_step) train_variables = tf.trainable_variables() grads_vars = opt.compute_gradients(loss, train_variables) for i, (grad, var) in enumerate(grads_vars): grads_vars[i] = ( tf.clip_by_norm(grad, self.config.grad_clip), var) train_op = opt.apply_gradients( grads_vars, global_step=self.global_step) return train_op
Example #18
Source File: tf_util.py From m3ddpg with MIT License | 6 votes |
def minimize_and_clip(optimizer, objective, var_list, clip_val=10): """Minimized `objective` using `optimizer` w.r.t. variables in `var_list` while ensure the norm of the gradients for each variable is clipped to `clip_val` """ if clip_val is None: return optimizer.minimize(objective, var_list=var_list) else: gradients = optimizer.compute_gradients(objective, var_list=var_list) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_norm(grad, clip_val), var) return optimizer.apply_gradients(gradients) # ================================================================ # Global session # ================================================================
Example #19
Source File: model.py From tensorflow_nlp with Apache License 2.0 | 6 votes |
def build_model(self): self.build_memory() self.W = tf.Variable(tf.random_normal([self.edim, 3], stddev=self.init_std)) z = tf.matmul(self.hid[-1], self.W) self.loss = tf.nn.softmax_cross_entropy_with_logits(logits=z, labels=self.target) self.lr = tf.Variable(self.current_lr) self.opt = tf.train.GradientDescentOptimizer(self.lr) params = [self.A, self.B, self.C, self.T_A, self.T_B, self.W, self.ASP, self.BL_W, self.BL_B] grads_and_vars = self.opt.compute_gradients(self.loss, params) clipped_grads_and_vars = [(tf.clip_by_norm(gv[0], self.max_grad_norm), gv[1]) \ for gv in grads_and_vars] inc = self.global_step.assign_add(1) with tf.control_dependencies([inc]): self.optim = self.opt.apply_gradients(clipped_grads_and_vars) tf.global_variables_initializer().run() self.correct_prediction = tf.argmax(z, 1)
Example #20
Source File: dqn_utils.py From deep-reinforcement-learning with MIT License | 5 votes |
def minimize_and_clip(optimizer, objective, var_list, clip_val=10): """Minimized `objective` using `optimizer` w.r.t. variables in `var_list` while ensure the norm of the gradients for each variable is clipped to `clip_val` """ gradients = optimizer.compute_gradients(objective, var_list=var_list) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_norm(grad, clip_val), var) return optimizer.apply_gradients(gradients)
Example #21
Source File: runner.py From leo with Apache License 2.0 | 5 votes |
def _clip_gradients(gradients, gradient_threshold, gradient_norm_threshold): """Clips gradients by value and then by norm.""" if gradient_threshold > 0: gradients = [ tf.clip_by_value(g, -gradient_threshold, gradient_threshold) for g in gradients ] if gradient_norm_threshold > 0: gradients = [ tf.clip_by_norm(g, gradient_norm_threshold) for g in gradients ] return gradients
Example #22
Source File: HRDE_Model_SA.py From QA_HRDE_LTC with MIT License | 5 votes |
def _create_optimizer(self): print '[launch] create optimizer' with tf.name_scope('optimizer') as scope: opt_func = tf.train.AdamOptimizer(learning_rate=self.lr) gvs = opt_func.compute_gradients(self.loss) capped_gvs = [(tf.clip_by_norm(t=grad, clip_norm=1), var) for grad, var in gvs] self.optimizer = opt_func.apply_gradients(grads_and_vars=capped_gvs, global_step=self.global_step)
Example #23
Source File: model.py From gconvRNN with MIT License | 5 votes |
def _build_optim(self): def minimize(loss, step, var_list, learning_rate, optimizer): if optimizer == "sgd": optim = tf.train.GradientDescentOptimizer(learning_rate) elif optimizer == "adam": optim = tf.train.AdamOptimizer(learning_rate) elif optimizer == "rmsprop": optim = tf.train.RMSPropOptimizer(learning_rate) else: raise Exception("[!] Unkown optimizer: {}".format( optimizer)) ## Gradient clipping ## if self.max_grad_norm is not None: grads_and_vars = optim.compute_gradients( loss, var_list=var_list) new_grads_and_vars = [] for idx, (grad, var) in enumerate(grads_and_vars): if grad is not None and var in var_list: grad = tf.clip_by_norm(grad, self.max_grad_norm) grad = tf.check_numerics( grad, "Numerical error in gradient for {}".format( var.name)) new_grads_and_vars.append((grad, var)) return optim.apply_gradients(new_grads_and_vars, global_step=step) else: grads_and_vars = optim.compute_gradients( loss, var_list=var_list) return optim.apply_gradients(grads_and_vars, global_step=step) # optim # self.model_optim = minimize( self.loss, self.model_step, self.model_vars, self.learning_rate, self.optimizer)
Example #24
Source File: model.py From neural-combinatorial-rl-tensorflow with MIT License | 5 votes |
def _build_optim(self): losses = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.dec_targets, logits=self.dec_pred_logits) inference_losses = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.dec_targets, logits=self.dec_inference_logits) def apply_mask(op): length = tf.cast(op[:1], tf.int32) loss = op[1:] return tf.multiply(loss, tf.ones(length, dtype=tf.float32)) batch_loss = tf.div( tf.reduce_sum(tf.multiply(losses, self.mask)), tf.reduce_sum(self.mask), name="batch_loss") batch_inference_loss = tf.div( tf.reduce_sum(tf.multiply(losses, self.mask)), tf.reduce_sum(self.mask), name="batch_inference_loss") tf.losses.add_loss(batch_loss) total_loss = tf.losses.get_total_loss() self.total_loss = total_loss self.target_cross_entropy_losses = losses self.total_inference_loss = batch_inference_loss self.lr = tf.train.exponential_decay( self.lr_start, self.global_step, self.lr_decay_step, self.lr_decay_rate, staircase=True, name="learning_rate") optimizer = tf.train.AdamOptimizer(self.lr) if self.max_grad_norm != None: grads_and_vars = optimizer.compute_gradients(self.total_loss) for idx, (grad, var) in enumerate(grads_and_vars): if grad is not None: grads_and_vars[idx] = (tf.clip_by_norm(grad, self.max_grad_norm), var) self.optim = optimizer.apply_gradients(grads_and_vars, global_step=self.global_step) else: self.optim = optimizer.minimize(self.total_loss, global_step=self.global_step)
Example #25
Source File: policy_gradient.py From blocks with GNU General Public License v3.0 | 5 votes |
def __init__(self, agent, policy_model, total_reward): self.agent = agent self.policy_model = policy_model self.total_reward = total_reward # Compute MLE loss function. MLE is used to initialize parameters for policy gradient self.mle_policy_gradient = MaximumLikelihoodEstimation(agent, policy_model) # Compute loss function loss, entropy_penalty = self.calc_loss( self.policy_model.model_output, self.policy_model.model_output_indices, self.policy_model.target) optimizer = tf.train.AdamOptimizer(AbstractLearning.rl_learning_rate) using_grad_clip = True grad_clip_val = 5.0 if not using_grad_clip: train_step = optimizer.minimize(loss) else: gvs = optimizer.compute_gradients(loss) capped_gvs = [(tf.clip_by_norm(grad, grad_clip_val), var) if grad is not None else (grad, var) for grad, var in gvs] train_step = optimizer.apply_gradients(capped_gvs) # Create summaries for training summary_loss = tf.scalar_summary("Loss", loss) summary_target_min = tf.scalar_summary("Target Min", tf.reduce_min(self.policy_model.target)) summary_target_max = tf.scalar_summary("Target Max", tf.reduce_max(self.policy_model.target)) summary_target_mean = tf.scalar_summary("Target Mean", tf.reduce_mean(self.policy_model.target)) summary_entropy_penalty = tf.scalar_summary("Entropy Penalty", entropy_penalty) update_summaries = [summary_loss, summary_target_min, summary_target_max, summary_target_mean, summary_entropy_penalty] AbstractLearning.__init__(self, policy_model, loss, train_step, update_summaries)
Example #26
Source File: ml_estimation.py From blocks with GNU General Public License v3.0 | 5 votes |
def __init__(self, agent, policy_model): self.agent = agent self.policy_model = policy_model # Replay memory max_replay_memory_size = 2000 self.replay_memory = collections.deque(maxlen=max_replay_memory_size) rho = 0.5 self.ps = prioritized_sweeping.PrioritizedSweeping(0, rho) optimizer = tf.train.AdamOptimizer(self.mle_learning_rate) loss = MaximumLikelihoodEstimation.calc_loss( self.policy_model.model_output, self.policy_model.model_output_indices) using_grad_clip = True grad_clip_val = 5.0 if not using_grad_clip: train_step = optimizer.minimize(loss) else: gvs = optimizer.compute_gradients(loss) capped_gvs = [(tf.clip_by_norm(grad, grad_clip_val), var) if grad is not None else (grad, var) for grad, var in gvs] train_step = optimizer.apply_gradients(capped_gvs) # Create summaries for training summary_loss = tf.scalar_summary("Loss", loss) update_summaries = [summary_loss] AbstractLearning.__init__(self, policy_model, loss, train_step, update_summaries)
Example #27
Source File: policy_gradient_with_advantage.py From blocks with GNU General Public License v3.0 | 5 votes |
def __init__(self, agent, policy_model, state_value_model, total_reward): self.agent = agent self.policy_model = policy_model self.state_value_model = state_value_model self.total_reward = total_reward # Compute MLE loss function. MLE is used to initialize parameters for reinforce self.mle_policy_gradient = MaximumLikelihoodEstimation(agent, policy_model) # Compute reinforce loss function loss_reinforce, entropy_penalty = self.calc_loss( policy_model.model_output, policy_model.model_output_indices, policy_model.target) optimizer = tf.train.AdamOptimizer(self.rl_learning_rate) using_grad_clip = True grad_clip_val = 5.0 if not using_grad_clip: train_step = optimizer.minimize(loss_reinforce) else: gvs = optimizer.compute_gradients(loss_reinforce) capped_gvs = [(tf.clip_by_norm(grad, grad_clip_val), var) if grad is not None else (grad, var) for grad, var in gvs] train_step = optimizer.apply_gradients(capped_gvs) # Create summaries for training summary_loss = tf.scalar_summary("Loss", loss_reinforce) summary_target_min = tf.scalar_summary("Target Min", tf.reduce_min(self.policy_model.target)) summary_target_max = tf.scalar_summary("Target Max", tf.reduce_max(self.policy_model.target)) summary_target_mean = tf.scalar_summary("Target Mean", tf.reduce_mean(self.policy_model.target)) summary_entropy_penalty = tf.scalar_summary("Entropy Penalty", entropy_penalty) update_summaries = [summary_loss, summary_target_min, summary_target_max, summary_target_mean, summary_entropy_penalty] AbstractLearning.__init__(self, policy_model, loss_reinforce, train_step, update_summaries)
Example #28
Source File: q_learning.py From blocks with GNU General Public License v3.0 | 5 votes |
def __init__(self, agent, q_network, target_q_network): """ Creates constructor for an abstract learning setup """ self.agent = agent self.loss = None self.q_network = q_network self.target_q_network = target_q_network # Define epsilon greedy behaviour policy epsilon = 1.0 min_epsilon = 0.1 self.behaviour_policy = egp.EpsilonGreedyPolicy(epsilon, min_epsilon) # Replay memory and prioritized sweeping for sampling from the replay memory max_replay_memory_size = 2000 self.replay_memory = collections.deque(maxlen=max_replay_memory_size) rho = 0.5 self.ps = prioritized_sweeping.PrioritizedSweeping(0, rho) optimizer = tf.train.AdamOptimizer(self.rl_learning_rate) loss = self.calc_loss(self.q_network.model_output, self.q_network.model_output_indices, self.q_network.target) using_grad_clip = True grad_clip_val = 5.0 if not using_grad_clip: train_step = optimizer.minimize(loss) else: gvs = optimizer.compute_gradients(loss) capped_gvs = [(tf.clip_by_norm(grad, grad_clip_val), var) if grad is not None else (grad, var) for grad, var in gvs] train_step = optimizer.apply_gradients(capped_gvs) # Create summaries for training summary_loss = tf.scalar_summary("Loss", loss) update_summaries = [summary_loss] AbstractLearning.__init__(self, q_network, loss, train_step, update_summaries)
Example #29
Source File: DE_Model_SA.py From QA_HRDE_LTC with MIT License | 5 votes |
def _create_optimizer(self): print '[launch] create optimizer' with tf.name_scope('optimizer') as scope: opt_func = tf.train.AdamOptimizer(learning_rate=self.lr) gvs = opt_func.compute_gradients(self.loss) capped_gvs = [(tf.clip_by_norm(t=grad, clip_norm=1), var) for grad, var in gvs] self.optimizer = opt_func.apply_gradients(grads_and_vars=capped_gvs, global_step=self.global_step)
Example #30
Source File: clipping_step.py From tensorforce with Apache License 2.0 | 5 votes |
def tf_step(self, variables, **kwargs): deltas = self.optimizer.step(variables=variables, **kwargs) with tf.control_dependencies(control_inputs=deltas): threshold = self.threshold.value() if self.mode == 'global_norm': clipped_deltas, update_norm = tf.clip_by_global_norm( t_list=deltas, clip_norm=threshold ) else: update_norm = tf.linalg.global_norm(t_list=deltas) clipped_deltas = list() for delta in deltas: if self.mode == 'norm': clipped_delta = tf.clip_by_norm(t=delta, clip_norm=threshold) elif self.mode == 'value': clipped_delta = tf.clip_by_value( t=delta, clip_value_min=-threshold, clip_value_max=threshold ) clipped_deltas.append(clipped_delta) clipped_deltas = self.add_summary( label='update-norm', name='update-norm-unclipped', tensor=update_norm, pass_tensors=clipped_deltas ) exceeding_deltas = list() for delta, clipped_delta in zip(deltas, clipped_deltas): exceeding_deltas.append(clipped_delta - delta) applied = self.apply_step(variables=variables, deltas=exceeding_deltas) with tf.control_dependencies(control_inputs=(applied,)): return util.fmap(function=util.identity_operation, xs=clipped_deltas)