Python tensorflow.clip_by_value() Examples
The following are 30
code examples of tensorflow.clip_by_value().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow
, or try the search function
.
Example #1
Source File: preprocessor.py From object_detector_app with MIT License | 6 votes |
def random_adjust_saturation(image, min_delta=0.8, max_delta=1.25): """Randomly adjusts saturation. Makes sure the output image is still between 0 and 1. Args: image: rank 3 float32 tensor contains 1 image -> [height, width, channels] with pixel values varying between [0, 1]. min_delta: see max_delta. max_delta: how much to change the saturation. Saturation will change with a value between min_delta and max_delta. This value will be multiplied to the current saturation of the image. Returns: image: image which is the same shape as input image. """ with tf.name_scope('RandomAdjustSaturation', values=[image]): image = tf.image.random_saturation(image, min_delta, max_delta) image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0) return image
Example #2
Source File: vision_baseline_lstm.py From DOTA_models with Apache License 2.0 | 6 votes |
def visit_count_fc(visit_count, last_visit, embed_neurons, wt_decay, fc_dropout): with tf.variable_scope('embed_visit_count'): visit_count = tf.reshape(visit_count, shape=[-1]) last_visit = tf.reshape(last_visit, shape=[-1]) visit_count = tf.clip_by_value(visit_count, clip_value_min=-1, clip_value_max=15) last_visit = tf.clip_by_value(last_visit, clip_value_min=-1, clip_value_max=15) visit_count = tf.one_hot(visit_count, depth=16, axis=1, dtype=tf.float32, on_value=10., off_value=0.) last_visit = tf.one_hot(last_visit, depth=16, axis=1, dtype=tf.float32, on_value=10., off_value=0.) f = tf.concat([visit_count, last_visit], 1) x, _ = tf_utils.fc_network( f, neurons=embed_neurons, wt_decay=wt_decay, name='visit_count_embed', offset=0, batch_norm_param=None, dropout_ratio=fc_dropout, is_training=is_training) return x
Example #3
Source File: ddpg.py From lirpg with MIT License | 6 votes |
def setup_critic_optimizer(self): logger.info('setting up critic optimizer') normalized_critic_target_tf = tf.clip_by_value(normalize(self.critic_target, self.ret_rms), self.return_range[0], self.return_range[1]) self.critic_loss = tf.reduce_mean(tf.square(self.normalized_critic_tf - normalized_critic_target_tf)) if self.critic_l2_reg > 0.: critic_reg_vars = [var for var in self.critic.trainable_vars if 'kernel' in var.name and 'output' not in var.name] for var in critic_reg_vars: logger.info(' regularizing: {}'.format(var.name)) logger.info(' applying l2 regularization with {}'.format(self.critic_l2_reg)) critic_reg = tc.layers.apply_regularization( tc.layers.l2_regularizer(self.critic_l2_reg), weights_list=critic_reg_vars ) self.critic_loss += critic_reg critic_shapes = [var.get_shape().as_list() for var in self.critic.trainable_vars] critic_nb_params = sum([reduce(lambda x, y: x * y, shape) for shape in critic_shapes]) logger.info(' critic shapes: {}'.format(critic_shapes)) logger.info(' critic params: {}'.format(critic_nb_params)) self.critic_grads = U.flatgrad(self.critic_loss, self.critic.trainable_vars, clip_norm=self.clip_norm) self.critic_optimizer = MpiAdam(var_list=self.critic.trainable_vars, beta1=0.9, beta2=0.999, epsilon=1e-08)
Example #4
Source File: preprocessor.py From DOTA_models with Apache License 2.0 | 6 votes |
def random_adjust_brightness(image, max_delta=0.2): """Randomly adjusts brightness. Makes sure the output image is still between 0 and 1. Args: image: rank 3 float32 tensor contains 1 image -> [height, width, channels] with pixel values varying between [0, 1]. max_delta: how much to change the brightness. A value between [0, 1). Returns: image: image which is the same shape as input image. boxes: boxes which is the same shape as input boxes. """ with tf.name_scope('RandomAdjustBrightness', values=[image]): image = tf.image.random_brightness(image, max_delta) image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0) return image
Example #5
Source File: preprocessor.py From DOTA_models with Apache License 2.0 | 6 votes |
def random_adjust_contrast(image, min_delta=0.8, max_delta=1.25): """Randomly adjusts contrast. Makes sure the output image is still between 0 and 1. Args: image: rank 3 float32 tensor contains 1 image -> [height, width, channels] with pixel values varying between [0, 1]. min_delta: see max_delta. max_delta: how much to change the contrast. Contrast will change with a value between min_delta and max_delta. This value will be multiplied to the current contrast of the image. Returns: image: image which is the same shape as input image. """ with tf.name_scope('RandomAdjustContrast', values=[image]): image = tf.image.random_contrast(image, min_delta, max_delta) image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0) return image
Example #6
Source File: preprocessor.py From DOTA_models with Apache License 2.0 | 6 votes |
def random_adjust_hue(image, max_delta=0.02): """Randomly adjusts hue. Makes sure the output image is still between 0 and 1. Args: image: rank 3 float32 tensor contains 1 image -> [height, width, channels] with pixel values varying between [0, 1]. max_delta: change hue randomly with a value between 0 and max_delta. Returns: image: image which is the same shape as input image. """ with tf.name_scope('RandomAdjustHue', values=[image]): image = tf.image.random_hue(image, max_delta) image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0) return image
Example #7
Source File: preprocessor.py From DOTA_models with Apache License 2.0 | 6 votes |
def random_adjust_saturation(image, min_delta=0.8, max_delta=1.25): """Randomly adjusts saturation. Makes sure the output image is still between 0 and 1. Args: image: rank 3 float32 tensor contains 1 image -> [height, width, channels] with pixel values varying between [0, 1]. min_delta: see max_delta. max_delta: how much to change the saturation. Saturation will change with a value between min_delta and max_delta. This value will be multiplied to the current saturation of the image. Returns: image: image which is the same shape as input image. """ with tf.name_scope('RandomAdjustSaturation', values=[image]): image = tf.image.random_saturation(image, min_delta, max_delta) image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0) return image
Example #8
Source File: preprocessor.py From object_detector_app with MIT License | 6 votes |
def random_adjust_brightness(image, max_delta=0.2): """Randomly adjusts brightness. Makes sure the output image is still between 0 and 1. Args: image: rank 3 float32 tensor contains 1 image -> [height, width, channels] with pixel values varying between [0, 1]. max_delta: how much to change the brightness. A value between [0, 1). Returns: image: image which is the same shape as input image. boxes: boxes which is the same shape as input boxes. """ with tf.name_scope('RandomAdjustBrightness', values=[image]): image = tf.image.random_brightness(image, max_delta) image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0) return image
Example #9
Source File: preprocessor.py From object_detector_app with MIT License | 6 votes |
def random_adjust_contrast(image, min_delta=0.8, max_delta=1.25): """Randomly adjusts contrast. Makes sure the output image is still between 0 and 1. Args: image: rank 3 float32 tensor contains 1 image -> [height, width, channels] with pixel values varying between [0, 1]. min_delta: see max_delta. max_delta: how much to change the contrast. Contrast will change with a value between min_delta and max_delta. This value will be multiplied to the current contrast of the image. Returns: image: image which is the same shape as input image. """ with tf.name_scope('RandomAdjustContrast', values=[image]): image = tf.image.random_contrast(image, min_delta, max_delta) image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0) return image
Example #10
Source File: preprocessor.py From object_detector_app with MIT License | 6 votes |
def random_adjust_hue(image, max_delta=0.02): """Randomly adjusts hue. Makes sure the output image is still between 0 and 1. Args: image: rank 3 float32 tensor contains 1 image -> [height, width, channels] with pixel values varying between [0, 1]. max_delta: change hue randomly with a value between 0 and max_delta. Returns: image: image which is the same shape as input image. """ with tf.name_scope('RandomAdjustHue', values=[image]): image = tf.image.random_hue(image, max_delta) image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0) return image
Example #11
Source File: inception_resnet_v2.py From DeepLab_v3 with MIT License | 6 votes |
def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): """Builds the 35x35 resnet block.""" with tf.variable_scope(scope, 'Block35', [net], reuse=reuse): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3') with tf.variable_scope('Branch_2'): tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2_0, 48, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 64, 3, scope='Conv2d_0c_3x3') mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_1, tower_conv2_2]) up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, activation_fn=None, scope='Conv2d_1x1') scaled_up = up * scale if activation_fn == tf.nn.relu6: # Use clip_by_value to simulate bandpass activation. scaled_up = tf.clip_by_value(scaled_up, -6.0, 6.0) net += scaled_up if activation_fn: net = activation_fn(net) return net
Example #12
Source File: inception_resnet_v2.py From DeepLab_v3 with MIT License | 6 votes |
def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): """Builds the 17x17 resnet block.""" with tf.variable_scope(scope, 'Block17', [net], reuse=reuse): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 160, [1, 7], scope='Conv2d_0b_1x7') tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [7, 1], scope='Conv2d_0c_7x1') mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2]) up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, activation_fn=None, scope='Conv2d_1x1') scaled_up = up * scale if activation_fn == tf.nn.relu6: # Use clip_by_value to simulate bandpass activation. scaled_up = tf.clip_by_value(scaled_up, -6.0, 6.0) net += scaled_up if activation_fn: net = activation_fn(net) return net
Example #13
Source File: inception_resnet_v2.py From DeepLab_v3 with MIT License | 6 votes |
def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): """Builds the 8x8 resnet block.""" with tf.variable_scope(scope, 'Block8', [net], reuse=reuse): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 224, [1, 3], scope='Conv2d_0b_1x3') tower_conv1_2 = slim.conv2d(tower_conv1_1, 256, [3, 1], scope='Conv2d_0c_3x1') mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2]) up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, activation_fn=None, scope='Conv2d_1x1') scaled_up = up * scale if activation_fn == tf.nn.relu6: # Use clip_by_value to simulate bandpass activation. scaled_up = tf.clip_by_value(scaled_up, -6.0, 6.0) net += scaled_up if activation_fn: net = activation_fn(net) return net
Example #14
Source File: face_attack.py From Adversarial-Face-Attack with GNU General Public License v3.0 | 6 votes |
def build_pgd_attack(self, eps): victim_embeddings = tf.constant(self.victim_embeddings, dtype=tf.float32) def one_step_attack(image, grad): """ core components of this attack are: (a) PGD adversarial attack (https://arxiv.org/pdf/1706.06083.pdf) (b) momentum (https://arxiv.org/pdf/1710.06081.pdf) (c) input diversity (https://arxiv.org/pdf/1803.06978.pdf) """ orig_image = image image = self.structure(image) image = (image - 127.5) / 128.0 image = image + tf.random_uniform(tf.shape(image), minval=-1e-2, maxval=1e-2) prelogits, _ = self.network.inference(image, 1.0, False, bottleneck_layer_size=512) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') embeddings = tf.reshape(embeddings[0], [512, 1]) objective = tf.reduce_mean(tf.matmul(victim_embeddings, embeddings)) # to be maximized noise, = tf.gradients(objective, orig_image) noise = noise / tf.reduce_mean(tf.abs(noise), [1, 2, 3], keep_dims=True) noise = 0.9 * grad + noise adv = tf.clip_by_value(orig_image + tf.sign(noise) * 1.0, lower_bound, upper_bound) return adv, noise input = tf.to_float(self.image_batch) lower_bound = tf.clip_by_value(input - eps, 0, 255.) upper_bound = tf.clip_by_value(input + eps, 0, 255.) with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): adv, _ = tf.while_loop( lambda _, __: True, one_step_attack, (input, tf.zeros_like(input)), back_prop=False, maximum_iterations=100, parallel_iterations=1) self.adv_image = adv return adv
Example #15
Source File: nn.py From cs294-112_hws with MIT License | 6 votes |
def call(self, inputs): mean_and_log_std = self.model(inputs) mean, log_std = tf.split(mean_and_log_std, num_or_size_splits=2, axis=1) log_std = tf.clip_by_value(log_std, -20., 2.) distribution = tfp.distributions.MultivariateNormalDiag( loc=mean, scale_diag=tf.exp(log_std) ) raw_actions = distribution.sample() if not self._reparameterize: ### Problem 1.3.A ### YOUR CODE HERE raw_actions = tf.stop_gradient(raw_actions) log_probs = distribution.log_prob(raw_actions) log_probs -= self._squash_correction(raw_actions) ### Problem 2.A ### YOUR CODE HERE self.actions = tf.tanh(raw_actions) return self.actions, log_probs
Example #16
Source File: filters.py From exposure with MIT License | 5 votes |
def process(self, img, param): # img = tf.minimum(img, 1.0) tone_curve = param tone_curve_sum = tf.reduce_sum(tone_curve, axis=4) + 1e-30 total_image = img * 0 for i in range(self.cfg.curve_steps): total_image += tf.clip_by_value(img - 1.0 * i / self.cfg.curve_steps, 0, 1.0 / self.cfg.curve_steps) \ * param[:, :, :, :, i] total_image *= self.cfg.curve_steps / tone_curve_sum img = total_image return img
Example #17
Source File: nn.py From kvae with MIT License | 5 votes |
def dclip(x, min, max): return x + tf.stop_gradient(tf.clip_by_value(x, min, max) - x)
Example #18
Source File: policies.py From evolution-strategies-starter with MIT License | 5 votes |
def _initialize(self, ob_space, ac_space, ac_bins, ac_noise_std, nonlin_type, hidden_dims, connection_type): self.ac_space = ac_space self.ac_bins = ac_bins self.ac_noise_std = ac_noise_std self.hidden_dims = hidden_dims self.connection_type = connection_type assert len(ob_space.shape) == len(self.ac_space.shape) == 1 assert np.all(np.isfinite(self.ac_space.low)) and np.all(np.isfinite(self.ac_space.high)), \ 'Action bounds required' self.nonlin = {'tanh': tf.tanh, 'relu': tf.nn.relu, 'lrelu': U.lrelu, 'elu': tf.nn.elu}[nonlin_type] with tf.variable_scope(type(self).__name__) as scope: # Observation normalization ob_mean = tf.get_variable( 'ob_mean', ob_space.shape, tf.float32, tf.constant_initializer(np.nan), trainable=False) ob_std = tf.get_variable( 'ob_std', ob_space.shape, tf.float32, tf.constant_initializer(np.nan), trainable=False) in_mean = tf.placeholder(tf.float32, ob_space.shape) in_std = tf.placeholder(tf.float32, ob_space.shape) self._set_ob_mean_std = U.function([in_mean, in_std], [], updates=[ tf.assign(ob_mean, in_mean), tf.assign(ob_std, in_std), ]) # Policy network o = tf.placeholder(tf.float32, [None] + list(ob_space.shape)) a = self._make_net(tf.clip_by_value((o - ob_mean) / ob_std, -5.0, 5.0)) self._act = U.function([o], a) return scope
Example #19
Source File: goal_nav_agent.py From streetlearn with Apache License 2.0 | 5 votes |
def _torso(self, input_): """Processing of all the visual and language inputs to the LSTM core.""" # Extract the inputs last_action, env_output = input_ last_reward, _, _, observation = env_output frame = observation[self._idx_frame] goal = observation[self._idx_goal] goal = tf.to_float(goal) # Convert to image to floats and normalise. frame = tf.to_float(frame) frame = snt.FlattenTrailingDimensions(dim_from=3)(frame) frame /= 255.0 # Feed image through convnet. with tf.variable_scope('convnet'): # Convolutional layers. conv_out = self._convnet(frame) # Fully connected layer. conv_out = snt.BatchFlatten()(conv_out) conv_out = snt.Linear(256)(conv_out) conv_out = tf.nn.relu(conv_out) # Concatenate outputs of the visual and instruction pathways. if self._feed_action_and_reward: # Append clipped last reward and one hot last action. tf.logging.info('Append last reward clipped to: %f', self._max_reward) clipped_last_reward = tf.expand_dims( tf.clip_by_value(last_reward, -self._max_reward, self._max_reward), -1) tf.logging.info('Append last action (one-hot of %d)', self._num_actions) one_hot_last_action = tf.one_hot(last_action, self._num_actions) tf.logging.info('Append goal:') tf.logging.info(goal) action_and_reward = tf.concat([clipped_last_reward, one_hot_last_action], axis=1) else: action_and_reward = tf.constant([0], dtype=tf.float32) return conv_out, action_and_reward, goal
Example #20
Source File: nn.py From kvae with MIT License | 5 votes |
def log_bernoulli(x, p, eps=0.0): p = tf.clip_by_value(p, eps, 1.0 - eps) return x * tf.log(p) + (1 - x) * tf.log(1 - p)
Example #21
Source File: preprocessor.py From object_detector_app with MIT License | 5 votes |
def random_pixel_value_scale(image, minval=0.9, maxval=1.1, seed=None): """Scales each value in the pixels of the image. This function scales each pixel independent of the other ones. For each value in image tensor, draws a random number between minval and maxval and multiples the values with them. Args: image: rank 3 float32 tensor contains 1 image -> [height, width, channels] with pixel values varying between [0, 1]. minval: lower ratio of scaling pixel values. maxval: upper ratio of scaling pixel values. seed: random seed. Returns: image: image which is the same shape as input image. boxes: boxes which is the same shape as input boxes. """ with tf.name_scope('RandomPixelValueScale', values=[image]): color_coef = tf.random_uniform( tf.shape(image), minval=minval, maxval=maxval, dtype=tf.float32, seed=seed) image = tf.multiply(image, color_coef) image = tf.clip_by_value(image, 0.0, 1.0) return image
Example #22
Source File: filters.py From exposure with MIT License | 5 votes |
def process(self, img, param): color_curve = param # There will be no division by zero here unless the color filter range lower bound is 0 color_curve_sum = tf.reduce_sum(param, axis=4) + 1e-30 total_image = img * 0 for i in range(self.cfg.curve_steps): total_image += tf.clip_by_value(img - 1.0 * i / self.cfg.curve_steps, 0, 1.0 / self.cfg.curve_steps) * \ color_curve[:, :, :, :, i] total_image *= self.cfg.curve_steps / color_curve_sum return total_image
Example #23
Source File: policies.py From HardRLWithYoutube with MIT License | 5 votes |
def _normalize_clip_observation(x, clip_range=[-5.0, 5.0]): rms = RunningMeanStd(shape=x.shape[1:]) norm_x = tf.clip_by_value((x - rms.mean) / rms.std, min(clip_range), max(clip_range)) return norm_x, rms
Example #24
Source File: mlp_policy.py From lirpg with MIT License | 5 votes |
def _init(self, ob_space, ac_space, hid_size, num_hid_layers, gaussian_fixed_var=True): assert isinstance(ob_space, gym.spaces.Box) self.pdtype = pdtype = make_pdtype(ac_space) sequence_length = None ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape)) with tf.variable_scope("obfilter"): self.ob_rms = RunningMeanStd(shape=ob_space.shape) with tf.variable_scope('vf'): obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std, -5.0, 5.0) last_out = obz for i in range(num_hid_layers): last_out = tf.nn.tanh(tf.layers.dense(last_out, hid_size, name="fc%i"%(i+1), kernel_initializer=U.normc_initializer(1.0))) self.vpred = tf.layers.dense(last_out, 1, name='final', kernel_initializer=U.normc_initializer(1.0))[:,0] with tf.variable_scope('pol'): last_out = obz for i in range(num_hid_layers): last_out = tf.nn.tanh(tf.layers.dense(last_out, hid_size, name='fc%i'%(i+1), kernel_initializer=U.normc_initializer(1.0))) if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box): mean = tf.layers.dense(last_out, pdtype.param_shape()[0]//2, name='final', kernel_initializer=U.normc_initializer(0.01)) logstd = tf.get_variable(name="logstd", shape=[1, pdtype.param_shape()[0]//2], initializer=tf.zeros_initializer()) pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1) else: pdparam = tf.layers.dense(last_out, pdtype.param_shape()[0], name='final', kernel_initializer=U.normc_initializer(0.01)) self.pd = pdtype.pdfromflat(pdparam) self.state_in = [] self.state_out = [] stochastic = tf.placeholder(dtype=tf.bool, shape=()) ac = U.switch(stochastic, self.pd.sample(), self.pd.mode()) self._act = U.function([stochastic, ob], [ac, self.vpred])
Example #25
Source File: normalizer.py From lirpg with MIT License | 5 votes |
def normalize(self, v, clip_range=None): if clip_range is None: clip_range = self.default_clip_range mean = reshape_for_broadcasting(self.mean, v) std = reshape_for_broadcasting(self.std, v) return tf.clip_by_value((v - mean) / std, -clip_range, clip_range)
Example #26
Source File: mlp_policy.py From lirpg with MIT License | 5 votes |
def _init(self, ob_space, ac_space, hid_size, num_hid_layers, gaussian_fixed_var=True): assert isinstance(ob_space, gym.spaces.Box) self.pdtype = pdtype = make_pdtype(ac_space) sequence_length = None ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape)) with tf.variable_scope("obfilter"): self.ob_rms = RunningMeanStd(shape=ob_space.shape) obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std, -5.0, 5.0) last_out = obz for i in range(num_hid_layers): last_out = tf.nn.tanh(dense(last_out, hid_size, "vffc%i" % (i+1), weight_init=U.normc_initializer(1.0))) self.vpred = dense(last_out, 1, "vffinal", weight_init=U.normc_initializer(1.0))[:, 0] last_out = obz for i in range(num_hid_layers): last_out = tf.nn.tanh(dense(last_out, hid_size, "polfc%i" % (i+1), weight_init=U.normc_initializer(1.0))) if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box): mean = dense(last_out, pdtype.param_shape()[0]//2, "polfinal", U.normc_initializer(0.01)) logstd = tf.get_variable(name="logstd", shape=[1, pdtype.param_shape()[0]//2], initializer=tf.zeros_initializer()) pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1) else: pdparam = dense(last_out, pdtype.param_shape()[0], "polfinal", U.normc_initializer(0.01)) self.pd = pdtype.pdfromflat(pdparam) self.state_in = [] self.state_out = [] # change for BC stochastic = U.get_placeholder(name="stochastic", dtype=tf.bool, shape=()) ac = U.switch(stochastic, self.pd.sample(), self.pd.mode()) self.ac = ac self._act = U.function([stochastic, ob], [ac, self.vpred])
Example #27
Source File: common_attention.py From fine-lm with MIT License | 5 votes |
def _generate_relative_positions_matrix(length, max_relative_position): """Generates matrix of relative positions between inputs.""" range_vec = tf.range(length) range_mat = tf.reshape(tf.tile(range_vec, [length]), [length, length]) distance_mat = range_mat - tf.transpose(range_mat) distance_mat_clipped = tf.clip_by_value(distance_mat, -max_relative_position, max_relative_position) # Shift values to be >= 0. Each integer still uniquely identifies a relative # position difference. final_mat = distance_mat_clipped + max_relative_position return final_mat
Example #28
Source File: model.py From Neural-LP with MIT License | 5 votes |
def _clip_if_not_None(self, g, v, low, high): """ Clip not-None gradients to (low, high). """ """ Gradient of T is None if T not connected to the objective. """ if g is not None: return (tf.clip_by_value(g, low, high), v) else: return (g, v)
Example #29
Source File: rl.py From fine-lm with MIT License | 5 votes |
def feed_forward_gaussian_fun(action_space, config, observations): """Feed-forward Gaussian.""" if not isinstance(action_space, gym.spaces.box.Box): raise ValueError("Expecting continuous action space.") mean_weights_initializer = tf.contrib.layers.variance_scaling_initializer( factor=config.init_mean_factor) logstd_initializer = tf.random_normal_initializer(config.init_logstd, 1e-10) flat_observations = tf.reshape(observations, [ tf.shape(observations)[0], tf.shape(observations)[1], functools.reduce(operator.mul, observations.shape.as_list()[2:], 1)]) with tf.variable_scope("network_parameters"): with tf.variable_scope("policy"): x = flat_observations for size in config.policy_layers: x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu) mean = tf.contrib.layers.fully_connected( x, action_space.shape[0], tf.tanh, weights_initializer=mean_weights_initializer) logstd = tf.get_variable( "logstd", mean.shape[2:], tf.float32, logstd_initializer) logstd = tf.tile( logstd[None, None], [tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2)) with tf.variable_scope("value"): x = flat_observations for size in config.value_layers: x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu) value = tf.contrib.layers.fully_connected(x, 1, None)[..., 0] mean = tf.check_numerics(mean, "mean") logstd = tf.check_numerics(logstd, "logstd") value = tf.check_numerics(value, "value") policy = tf.contrib.distributions.MultivariateNormalDiag(mean, tf.exp(logstd)) return NetworkOutput(policy, value, lambda a: tf.clip_by_value(a, -2., 2))
Example #30
Source File: ppo.py From fine-lm with MIT License | 5 votes |
def define_ppo_step(data_points, optimizer, hparams): """Define ppo step.""" observation, action, discounted_reward, norm_advantage, old_pdf = data_points new_policy_dist, new_value, _ = get_policy(observation, hparams) new_pdf = new_policy_dist.prob(action) ratio = new_pdf / old_pdf clipped_ratio = tf.clip_by_value(ratio, 1 - hparams.clipping_coef, 1 + hparams.clipping_coef) surrogate_objective = tf.minimum(clipped_ratio * norm_advantage, ratio * norm_advantage) policy_loss = -tf.reduce_mean(surrogate_objective) value_error = new_value - discounted_reward value_loss = hparams.value_loss_coef * tf.reduce_mean(value_error ** 2) entropy = new_policy_dist.entropy() entropy_loss = -hparams.entropy_loss_coef * tf.reduce_mean(entropy) losses = [policy_loss, value_loss, entropy_loss] gradients = [list(zip(*optimizer.compute_gradients(loss))) for loss in losses] gradients_norms = [tf.global_norm(gradient[0]) for gradient in gradients] gradients_flat = sum([gradient[0] for gradient in gradients], ()) gradients_variables_flat = sum([gradient[1] for gradient in gradients], ()) if hparams.max_gradients_norm: gradients_flat, _ = tf.clip_by_global_norm(gradients_flat, hparams.max_gradients_norm) optimize_op = optimizer.apply_gradients(zip(gradients_flat, gradients_variables_flat)) with tf.control_dependencies([optimize_op]): return [tf.identity(x) for x in losses + gradients_norms]