Python tensorflow.identity() Examples
The following are 30
code examples of tensorflow.identity().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow
, or try the search function
.
Example #1
Source File: modalities.py From fine-lm with MIT License | 6 votes |
def get_channel_embeddings(self, io_depth, targets, hidden_size, name="channel"): """Get separate embedding for each of the channels.""" targets_split = tf.split(targets, io_depth, axis=3) rgb_embedding_var = tf.get_variable("rgb_target_emb_%s" % name, [256 * io_depth, hidden_size]) rgb_embedding_var = tf.identity(rgb_embedding_var) rgb_embedding_var *= float(hidden_size)**0.5 channel_target_embs = [] for i in range(io_depth): # Adding the channel offsets to get the right embedding since the # embedding tensor has shape 256 * io_depth, hidden_size target_ids = tf.squeeze(targets_split[i], axis=3) + i * 256 target_embs = common_layers.gather(rgb_embedding_var, target_ids) channel_target_embs.append(target_embs) return tf.concat(channel_target_embs, axis=-1)
Example #2
Source File: collect.py From fine-lm with MIT License | 6 votes |
def simulate(self, action): # There is subtlety here. We need to collect data # obs, action = policy(obs), done, reward = env(abs, action) # Thus we need to enqueue data before assigning new observation reward, done = self._batch_env.simulate(action) with tf.control_dependencies([reward, done]): enqueue_op = self.speculum.enqueue( [self._observ.read_value(), reward, done, action]) with tf.control_dependencies([enqueue_op]): assign = self._observ.assign(self._batch_env.observ) with tf.control_dependencies([assign]): return tf.identity(reward), tf.identity(done)
Example #3
Source File: py_func_batch_env.py From fine-lm with MIT License | 6 votes |
def simulate(self, action): """Step the batch of environments. The results of the step can be accessed from the variables defined below. Args: action: Tensor holding the batch of actions to apply. Returns: Operation. """ with tf.name_scope('environment/simulate'): if action.dtype in (tf.float16, tf.float32, tf.float64): action = tf.check_numerics(action, 'action') observ_dtype = utils.parse_dtype(self._batch_env.observation_space) observ, reward, done = tf.py_func( lambda a: self._batch_env.step(a)[:3], [action], [observ_dtype, tf.float32, tf.bool], name='step') observ = tf.check_numerics(observ, 'observ') reward = tf.check_numerics(reward, 'reward') reward.set_shape((len(self),)) done.set_shape((len(self),)) with tf.control_dependencies([self._observ.assign(observ)]): return tf.identity(reward), tf.identity(done)
Example #4
Source File: py_func_batch_env.py From fine-lm with MIT License | 6 votes |
def _reset_non_empty(self, indices): """Reset the batch of environments. Args: indices: The batch indices of the environments to reset; defaults to all. Returns: Batch tensor of the new observations. """ observ_dtype = utils.parse_dtype(self._batch_env.observation_space) observ = tf.py_func( self._batch_env.reset, [indices], observ_dtype, name='reset') observ = tf.check_numerics(observ, 'observ') with tf.control_dependencies([ tf.scatter_update(self._observ, indices, observ)]): return tf.identity(observ)
Example #5
Source File: in_graph_batch_env.py From soccer-matlab with BSD 2-Clause "Simplified" License | 6 votes |
def reset(self, indices=None): """Reset the batch of environments. Args: indices: The batch indices of the environments to reset; defaults to all. Returns: Batch tensor of the new observations. """ if indices is None: indices = tf.range(len(self._batch_env)) observ_dtype = self._parse_dtype(self._batch_env.observation_space) observ = tf.py_func( self._batch_env.reset, [indices], observ_dtype, name='reset') observ = tf.check_numerics(observ, 'observ') reward = tf.zeros_like(indices, tf.float32) done = tf.zeros_like(indices, tf.bool) with tf.control_dependencies([ tf.scatter_update(self._observ, indices, observ), tf.scatter_update(self._reward, indices, reward), tf.scatter_update(self._done, indices, done)]): return tf.identity(observ)
Example #6
Source File: graphs.py From DOTA_models with Apache License 2.0 | 6 votes |
def _lm_loss(self, inputs, emb_key='lm_embedded', lstm_layer='lstm', lm_loss_layer='lm_loss', loss_name='lm_loss', compute_loss=True): embedded = self.layers['embedding'](inputs.tokens) self.tensors[emb_key] = embedded lstm_out, next_state = self.layers[lstm_layer](embedded, inputs.state, inputs.length) if compute_loss: loss = self.layers[lm_loss_layer]( [lstm_out, inputs.labels, inputs.weights]) with tf.control_dependencies([inputs.save_state(next_state)]): loss = tf.identity(loss) tf.summary.scalar(loss_name, loss) return loss
Example #7
Source File: optimize.py From fine-lm with MIT License | 6 votes |
def weight_decay_and_noise(loss, hparams, learning_rate, var_list=None): """Apply weight decay and weight noise.""" if var_list is None: var_list = tf.trainable_variables() decay_vars = [v for v in var_list] noise_vars = [v for v in var_list if "/body/" in v.name] weight_decay_loss = weight_decay(hparams.weight_decay, decay_vars) if hparams.weight_decay: tf.summary.scalar("losses/weight_decay", weight_decay_loss) weight_noise_ops = weight_noise(hparams.weight_noise, learning_rate, noise_vars) with tf.control_dependencies(weight_noise_ops): loss = tf.identity(loss) loss += weight_decay_loss return loss
Example #8
Source File: in_graph_batch_env.py From soccer-matlab with BSD 2-Clause "Simplified" License | 6 votes |
def reset(self, indices=None): """Reset the batch of environments. Args: indices: The batch indices of the environments to reset; defaults to all. Returns: Batch tensor of the new observations. """ if indices is None: indices = tf.range(len(self._batch_env)) observ_dtype = self._parse_dtype(self._batch_env.observation_space) observ = tf.py_func( self._batch_env.reset, [indices], observ_dtype, name='reset') observ = tf.check_numerics(observ, 'observ') reward = tf.zeros_like(indices, tf.float32) done = tf.zeros_like(indices, tf.bool) with tf.control_dependencies([ tf.scatter_update(self._observ, indices, observ), tf.scatter_update(self._reward, indices, reward), tf.scatter_update(self._done, indices, done)]): return tf.identity(observ)
Example #9
Source File: tf_atari_wrappers.py From fine-lm with MIT License | 6 votes |
def simulate(self, action): with tf.name_scope("environment/simulate"): # Do we need this? initializer = (tf.zeros(self.old_shape, dtype=tf.float32), tf.fill((len(self),), 0.0), tf.fill((len(self),), False)) def not_done_step(a, _): reward, done = self._batch_env.simulate(action) with tf.control_dependencies([reward, done]): r0 = self._batch_env.observ + 0 r1 = tf.add(a[1], reward) r2 = tf.logical_or(a[2], done) return (r0, r1, r2) simulate_ret = tf.scan(not_done_step, tf.range(self.skip), initializer=initializer, parallel_iterations=1, infer_shape=False) observations, rewards, dones = simulate_ret split_observations = tf.split(observations, self.skip, axis=0) split_observations = [tf.squeeze(o, axis=0) for o in split_observations] observation = tf.concat(split_observations, axis=-1) with tf.control_dependencies([self._observ.assign(observation)]): return tf.identity(rewards[-1, ...]), tf.identity(dones[-1, ...])
Example #10
Source File: algorithm.py From soccer-matlab with BSD 2-Clause "Simplified" License | 6 votes |
def _update_value_step(self, observ, reward, length): """Compute the current value loss and perform a gradient update step. Args: observ: Sequences of observations. reward: Sequences of reward. length: Batch of sequence lengths. Returns: Tuple of loss tensor and summary tensor. """ loss, summary = self._value_loss(observ, reward, length) gradients, variables = ( zip(*self._value_optimizer.compute_gradients(loss))) optimize = self._value_optimizer.apply_gradients( zip(gradients, variables)) summary = tf.summary.merge([ summary, tf.summary.scalar('gradient_norm', tf.global_norm(gradients)), utility.gradient_summaries( zip(gradients, variables), dict(value=r'.*'))]) with tf.control_dependencies([optimize]): return [tf.identity(loss), tf.identity(summary)]
Example #11
Source File: common_image_attention.py From fine-lm with MIT License | 6 votes |
def get_channel_embeddings(io_depth, targets, hidden_size, name="channel"): """Get separate embedding for each of the channels.""" targets_split = tf.split(targets, io_depth, axis=3) rgb_embedding_var = tf.get_variable("rgb_target_emb_%s" % name, [256 * io_depth, hidden_size]) rgb_embedding_var = tf.identity(rgb_embedding_var) rgb_embedding_var *= float(hidden_size)**0.5 channel_target_embs = [] for i in range(io_depth): # Adding the channel offsets to get the right embedding since the # embedding tensor has shape 256 * io_depth, hidden_size target_ids = tf.squeeze(targets_split[i], axis=3) + i * 256 target_embs = common_layers.gather(rgb_embedding_var, target_ids) channel_target_embs.append(target_embs) return tf.concat(channel_target_embs, axis=-1)
Example #12
Source File: component.py From DOTA_models with Apache License 2.0 | 6 votes |
def build_structured_training(self, state, network_states): """Builds a beam search based training loop for this component. The default implementation builds a dummy graph and raises a TensorFlow runtime exception to indicate that structured training is not implemented. Args: state: MasterState from the 'AdvanceMaster' op that advances the underlying master to this component. network_states: dictionary of component NetworkState objects. Returns: (handle, cost, correct, total) -- These are TF ops corresponding to the final handle after unrolling, the total cost, and the total number of actions. Since the number of correctly predicted actions is not applicable in the structured training setting, a dummy value should returned. """ del network_states # Unused. with tf.control_dependencies([tf.Assert(False, ['Not implemented.'])]): handle = tf.identity(state.handle) cost = tf.constant(0.) correct, total = tf.constant(0), tf.constant(0) return handle, cost, correct, total
Example #13
Source File: utils.py From UROP-Adversarial-Feature-Matching-for-Text-Generation with GNU Affero General Public License v3.0 | 6 votes |
def calculate_mmd(x, y, param, batch_size): xt = tf.transpose(x) yt = tf.transpose(y) x0 = tf.identity(x) y0 = tf.identity(y) x1 = tf.identity(xt) y1 = tf.identity(yt) for i in range(batch_size - 1): x0 = tf.concat([x0, x], axis=1) y0 = tf.concat([y0, y], axis=1) x1 = tf.concat([x1, xt], axis=0) y1 = tf.concat([y1, yt], axis=0) gaussian_mmd = calculate_gaussian_mmd(x0, y0, x1, y1, param, batch_size) logistic_mmd = calculate_logistic_mmd(x0, y0, x1, y1, param, batch_size) mmd = param['logistic'] * logistic_mmd + param['gaussian'] * gaussian_mmd return mmd
Example #14
Source File: build.py From Traffic_sign_detection_YOLO with MIT License | 6 votes |
def build_forward(self): verbalise = self.FLAGS.verbalise # Placeholders inp_size = [None] + self.meta['inp_size'] self.inp = tf.placeholder(tf.float32, inp_size, 'input') self.feed = dict() # other placeholders # Build the forward pass state = identity(self.inp) roof = self.num_layer - self.ntrain self.say(HEADER, LINE) for i, layer in enumerate(self.darknet.layers): scope = '{}-{}'.format(str(i),layer.type) args = [layer, state, i, roof, self.feed] state = op_create(*args) mess = state.verbalise() self.say(mess) self.say(LINE) self.top = state self.out = tf.identity(state.out, name='output')
Example #15
Source File: tfutil.py From disentangling_conditional_gans with MIT License | 6 votes |
def autosummary(name, value): id = name.replace('/', '_') if is_tf_expression(value): with tf.name_scope('summary_' + id), tf.device(value.device): update_op = _create_autosummary_var(name, value) with tf.control_dependencies([update_op]): return tf.identity(value) else: # python scalar or numpy array if name not in _autosummary_immediate: with absolute_name_scope('Autosummary/' + id), tf.device(None), tf.control_dependencies(None): update_value = tf.placeholder(tf.float32) update_op = _create_autosummary_var(name, update_value) _autosummary_immediate[name] = update_op, update_value update_op, update_value = _autosummary_immediate[name] run(update_op, {update_value: np.float32(value)}) return value # Create the necessary ops to include autosummaries in TensorBoard report. # Note: This should be done only once per graph.
Example #16
Source File: build.py From Automatic-Identification-and-Counting-of-Blood-Cells with GNU General Public License v3.0 | 6 votes |
def build_forward(self): verbalise = self.FLAGS.verbalise # Placeholders inp_size = [None] + self.meta['inp_size'] self.inp = tf.placeholder(tf.float32, inp_size, 'input') self.feed = dict() # other placeholders # Build the forward pass state = identity(self.inp) roof = self.num_layer - self.ntrain self.say(HEADER, LINE) for i, layer in enumerate(self.darknet.layers): scope = '{}-{}'.format(str(i), layer.type) args = [layer, state, i, roof, self.feed] state = op_create(*args) mess = state.verbalise() self.say(mess) self.say(LINE) self.top = state self.out = tf.identity(state.out, name='output')
Example #17
Source File: tf_atari_wrappers.py From fine-lm with MIT License | 6 votes |
def simulate(self, action): with tf.name_scope("environment/simulate"): # Do we need this? initializer = (tf.zeros_like(self._observ), tf.fill((len(self),), 0.0), tf.fill((len(self),), False)) def not_done_step(a, _): reward, done = self._batch_env.simulate(action) with tf.control_dependencies([reward, done]): # TODO(piotrmilos): possibly ignore envs with done r0 = tf.maximum(a[0], self._batch_env.observ) r1 = tf.add(a[1], reward) r2 = tf.logical_or(a[2], done) return (r0, r1, r2) simulate_ret = tf.scan(not_done_step, tf.range(self.skip), initializer=initializer, parallel_iterations=1, infer_shape=False) simulate_ret = [ret[-1, ...] for ret in simulate_ret] with tf.control_dependencies([self._observ.assign(simulate_ret[0])]): return tf.identity(simulate_ret[1]), tf.identity(simulate_ret[2])
Example #18
Source File: yellowfin.py From fine-lm with MIT License | 5 votes |
def _curvature_range(self): """Curvature range. Returns: h_max_t, h_min_t ops """ self._curv_win = tf.get_variable("curv_win", dtype=tf.float32, trainable=False, shape=[self.curvature_window_width,], initializer=tf.zeros_initializer) # We use log smoothing for curvature range self._curv_win = tf.scatter_update(self._curv_win, self._step % self.curvature_window_width, tf.log(self._grad_norm_squared)) # Note here the iterations start from iteration 0 valid_window = tf.slice(self._curv_win, tf.constant([0,]), tf.expand_dims( tf.minimum( tf.constant(self.curvature_window_width), self._step + 1), dim=0)) self._h_min_t = tf.reduce_min(valid_window) self._h_max_t = tf.reduce_max(valid_window) curv_range_ops = [] with tf.control_dependencies([self._h_min_t, self._h_max_t]): avg_op = self._moving_averager.apply([self._h_min_t, self._h_max_t]) with tf.control_dependencies([avg_op]): self._h_min = tf.exp( tf.identity(self._moving_averager.average(self._h_min_t))) self._h_max = tf.exp( tf.identity(self._moving_averager.average(self._h_max_t))) if self._sparsity_debias: self._h_min *= self._sparsity_avg self._h_max *= self._sparsity_avg curv_range_ops.append(avg_op) return curv_range_ops # h_max_t, h_min_t
Example #19
Source File: loop.py From soccer-matlab with BSD 2-Clause "Simplified" License | 5 votes |
def _define_step(self, done, score, summary): """Combine operations of a phase. Keeps track of the mean score and when to report it. Args: done: Tensor indicating whether current score can be used. score: Tensor holding the current, possibly intermediate, score. summary: Tensor holding summary string to write if not an empty string. Returns: Tuple of summary tensor, mean score, and new global step. The mean score is zero for non reporting steps. """ if done.shape.ndims == 0: done = done[None] if score.shape.ndims == 0: score = score[None] score_mean = streaming_mean.StreamingMean((), tf.float32) with tf.control_dependencies([done, score, summary]): done_score = tf.gather(score, tf.where(done)[:, 0]) submit_score = tf.cond( tf.reduce_any(done), lambda: score_mean.submit(done_score), tf.no_op) with tf.control_dependencies([submit_score]): mean_score = tf.cond(self._report, score_mean.clear, float) steps_made = tf.shape(score)[0] next_step = self._step.assign_add(steps_made) with tf.control_dependencies([mean_score, next_step]): return tf.identity(summary), mean_score, next_step, steps_made
Example #20
Source File: in_graph_env.py From soccer-matlab with BSD 2-Clause "Simplified" License | 5 votes |
def reset(self): """Reset the environment. Returns: Tensor of the current observation. """ observ_dtype = self._parse_dtype(self._env.observation_space) observ = tf.py_func(self._env.reset, [], observ_dtype, name='reset') observ = tf.check_numerics(observ, 'observ') with tf.control_dependencies([ self._observ.assign(observ), self._reward.assign(0), self._done.assign(False)]): return tf.identity(observ)
Example #21
Source File: discretization.py From fine-lm with MIT License | 5 votes |
def vq_discrete_bottleneck(x, bottleneck_bits, beta=0.25, decay=0.999, epsilon=1e-5, soft_em=False, num_samples=10): """Simple vector quantized discrete bottleneck.""" bottleneck_size = 2**bottleneck_bits x_shape = common_layers.shape_list(x) hidden_size = x_shape[-1] means, ema_means, ema_count = get_vq_bottleneck(bottleneck_size, hidden_size) x = tf.reshape(x, [-1, hidden_size]) x_means_hot, e_loss = vq_nearest_neighbor( x, means, soft_em=soft_em, num_samples=num_samples) # Update the ema variables updated_ema_count = moving_averages.assign_moving_average( ema_count, tf.reduce_sum( tf.reshape(x_means_hot, shape=[-1, bottleneck_size]), axis=0), decay, zero_debias=False) dw = tf.matmul(x_means_hot, x, transpose_a=True) updated_ema_means = tf.identity(moving_averages.assign_moving_average( ema_means, dw, decay, zero_debias=False)) n = tf.reduce_sum(updated_ema_count, axis=-1, keepdims=True) updated_ema_count = ( (updated_ema_count + epsilon) / (n + bottleneck_size * epsilon) * n) updated_ema_means /= tf.expand_dims(updated_ema_count, axis=-1) with tf.control_dependencies([e_loss]): update_means = means.assign(updated_ema_means) with tf.control_dependencies([update_means]): loss = beta * e_loss d = tf.reshape(x_means_hot, x_shape[:-1] + [bottleneck_size]) return d, loss
Example #22
Source File: adversary.py From lirpg with MIT License | 5 votes |
def build_graph(self, obs_ph, acs_ph, reuse=False): with tf.variable_scope(self.scope): if reuse: tf.get_variable_scope().reuse_variables() with tf.variable_scope("obfilter"): self.obs_rms = RunningMeanStd(shape=self.observation_shape) obs = (obs_ph - self.obs_rms.mean / self.obs_rms.std) _input = tf.concat([obs, acs_ph], axis=1) # concatenate the two input -> form a transition p_h1 = tf.contrib.layers.fully_connected(_input, self.hidden_size, activation_fn=tf.nn.tanh) p_h2 = tf.contrib.layers.fully_connected(p_h1, self.hidden_size, activation_fn=tf.nn.tanh) logits = tf.contrib.layers.fully_connected(p_h2, 1, activation_fn=tf.identity) return logits
Example #23
Source File: t2t_model.py From fine-lm with MIT License | 5 votes |
def _shard_features(self, features): # pylint: disable=missing-docstring sharded_features = dict() for k, v in sorted(six.iteritems(features)): v = tf.convert_to_tensor(v) v_shape = common_layers.shape_list(v) if not v_shape: v = tf.expand_dims(v, axis=-1) v_shape = [1] if v_shape == [1]: v = tf.tile(v, tf.to_int32([self._num_datashards])) sharded_features[k] = self._data_parallelism( tf.identity, tf.split(v, self._num_datashards, 0)) return sharded_features
Example #24
Source File: algorithm.py From soccer-matlab with BSD 2-Clause "Simplified" License | 5 votes |
def _update_step( self, observ, action, old_mean, old_logstd, reward, advantage, length): """Compute the current combined loss and perform a gradient update step. Args: observ: Sequences of observations. action: Sequences of actions. old_mean: Sequences of action means of the behavioral policy. old_logstd: Sequences of action log stddevs of the behavioral policy. reward: Sequences of reward. advantage: Sequences of advantages. length: Batch of sequence lengths. Returns: Tuple of value loss, policy loss, and summary tensor. """ value_loss, value_summary = self._value_loss(observ, reward, length) network = self._network(observ, length) policy_loss, policy_summary = self._policy_loss( network.mean, network.logstd, old_mean, old_logstd, action, advantage, length) value_gradients, value_variables = ( zip(*self._optimizer.compute_gradients(value_loss))) policy_gradients, policy_variables = ( zip(*self._optimizer.compute_gradients(policy_loss))) all_gradients = value_gradients + policy_gradients all_variables = value_variables + policy_variables optimize = self._optimizer.apply_gradients( zip(all_gradients, all_variables)) summary = tf.summary.merge([ value_summary, policy_summary, tf.summary.scalar( 'value_gradient_norm', tf.global_norm(value_gradients)), tf.summary.scalar( 'policy_gradient_norm', tf.global_norm(policy_gradients)), utility.gradient_summaries( zip(value_gradients, value_variables), dict(value=r'.*')), utility.gradient_summaries( zip(policy_gradients, policy_variables), dict(policy=r'.*'))]) with tf.control_dependencies([optimize]): return [tf.identity(x) for x in (value_loss, policy_loss, summary)]
Example #25
Source File: algorithm.py From soccer-matlab with BSD 2-Clause "Simplified" License | 5 votes |
def _training(self): """Perform multiple training iterations of both policy and value baseline. Training on the episodes collected in the memory. Reset the memory afterwards. Always returns a summary string. Returns: Summary tensor. """ with tf.name_scope('training'): assert_full = tf.assert_equal( self._memory_index, self._config.update_every) with tf.control_dependencies([assert_full]): data = self._memory.data() (observ, action, old_mean, old_logstd, reward), length = data with tf.control_dependencies([tf.assert_greater(length, 0)]): length = tf.identity(length) observ = self._observ_filter.transform(observ) reward = self._reward_filter.transform(reward) update_summary = self._perform_update_steps( observ, action, old_mean, old_logstd, reward, length) with tf.control_dependencies([update_summary]): penalty_summary = self._adjust_penalty( observ, old_mean, old_logstd, length) with tf.control_dependencies([penalty_summary]): clear_memory = tf.group( self._memory.clear(), self._memory_index.assign(0)) with tf.control_dependencies([clear_memory]): weight_summary = utility.variable_summaries( tf.trainable_variables(), self._config.weight_summaries) return tf.summary.merge([ update_summary, penalty_summary, weight_summary])
Example #26
Source File: tf_atari_wrappers.py From fine-lm with MIT License | 5 votes |
def simulate(self, action): action = tf.Print(action, [action], message="action=", summarize=200) # action = tf.zeros_like(action) #Temporary hacked bugfix reward, done = self._batch_env.simulate(action) with tf.control_dependencies([reward, done]): with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): unpacked = discretization.int_to_bit(self._batch_env.observ, 8) unpacked = tf.reshape(unpacked, (-1,)+self.observ_shape) assign_op = self._observ.assign(unpacked) with tf.control_dependencies([assign_op]): return tf.identity(reward), tf.identity(done)
Example #27
Source File: streaming_mean.py From soccer-matlab with BSD 2-Clause "Simplified" License | 5 votes |
def clear(self): """Return the mean estimate and reset the streaming statistics.""" value = self._sum / tf.cast(self._count, self._dtype) with tf.control_dependencies([value]): reset_value = self._sum.assign(tf.zeros_like(self._sum)) reset_count = self._count.assign(0) with tf.control_dependencies([reset_value, reset_count]): return tf.identity(value)
Example #28
Source File: algorithm.py From soccer-matlab with BSD 2-Clause "Simplified" License | 5 votes |
def _update_policy_step( self, observ, action, old_mean, old_logstd, advantage, length): """Compute the current policy loss and perform a gradient update step. Args: observ: Sequences of observations. action: Sequences of actions. old_mean: Sequences of action means of the behavioral policy. old_logstd: Sequences of action log stddevs of the behavioral policy. advantage: Sequences of advantages. length: Batch of sequence lengths. Returns: Tuple of loss tensor and summary tensor. """ network = self._network(observ, length) loss, summary = self._policy_loss( network.mean, network.logstd, old_mean, old_logstd, action, advantage, length) gradients, variables = ( zip(*self._policy_optimizer.compute_gradients(loss))) optimize = self._policy_optimizer.apply_gradients( zip(gradients, variables)) summary = tf.summary.merge([ summary, tf.summary.scalar('gradient_norm', tf.global_norm(gradients)), utility.gradient_summaries( zip(gradients, variables), dict(policy=r'.*'))]) with tf.control_dependencies([optimize]): return [tf.identity(loss), tf.identity(summary)]
Example #29
Source File: algorithm.py From soccer-matlab with BSD 2-Clause "Simplified" License | 5 votes |
def _training(self): """Perform multiple training iterations of both policy and value baseline. Training on the episodes collected in the memory. Reset the memory afterwards. Always returns a summary string. Returns: Summary tensor. """ with tf.name_scope('training'): assert_full = tf.assert_equal( self._memory_index, self._config.update_every) with tf.control_dependencies([assert_full]): data = self._memory.data() (observ, action, old_mean, old_logstd, reward), length = data with tf.control_dependencies([tf.assert_greater(length, 0)]): length = tf.identity(length) observ = self._observ_filter.transform(observ) reward = self._reward_filter.transform(reward) policy_summary = self._update_policy( observ, action, old_mean, old_logstd, reward, length) with tf.control_dependencies([policy_summary]): value_summary = self._update_value(observ, reward, length) with tf.control_dependencies([value_summary]): penalty_summary = self._adjust_penalty( observ, old_mean, old_logstd, length) with tf.control_dependencies([penalty_summary]): clear_memory = tf.group( self._memory.clear(), self._memory_index.assign(0)) with tf.control_dependencies([clear_memory]): weight_summary = utility.variable_summaries( tf.trainable_variables(), self._config.weight_summaries) return tf.summary.merge([ policy_summary, value_summary, penalty_summary, weight_summary])
Example #30
Source File: utils.py From DOTA_models with Apache License 2.0 | 5 votes |
def linear(x, out_size, do_bias=True, alpha=1.0, identity_if_possible=False, normalized=False, name=None, collections=None): """Linear (affine) transformation, y = x W + b, for a variety of configurations. Args: x: input The tensor to tranformation. out_size: The integer size of non-batch output dimension. do_bias (optional): Add a learnable bias vector to the operation. alpha (optional): A multiplicative scaling for the weight initialization of the matrix, in the form \alpha * 1/\sqrt{x.shape[1]}. identity_if_possible (optional): just return identity, if x.shape[1] == out_size. normalized (optional): Option to divide out by the norms of the rows of W. name (optional): The name prefix to add to variables. collections (optional): List of additional collections. (Placed in tf.GraphKeys.GLOBAL_VARIABLES already, so no need for that.) Returns: In the equation, y = x W + b, returns the tensorflow op that yields y. """ in_size = int(x.get_shape()[1]) # from Dimension(10) -> 10 stddev = alpha/np.sqrt(float(in_size)) mat_init = tf.random_normal_initializer(0.0, stddev) wname = (name + "/W") if name else "/W" if identity_if_possible and in_size == out_size: # Sometimes linear layers are nothing more than size adapters. return tf.identity(x, name=(wname+'_ident')) W,b = init_linear(in_size, out_size, do_bias=do_bias, alpha=alpha, normalized=normalized, name=name, collections=collections) if do_bias: return tf.matmul(x, W) + b else: return tf.matmul(x, W)