Python tensorflow.exp() Examples
The following are 30
code examples of tensorflow.exp().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow
, or try the search function
.
Example #1
Source File: common_layers.py From fine-lm with MIT License | 7 votes |
def get_timing_signal(length, min_timescale=1, max_timescale=1e4, num_timescales=16): """Create Tensor of sinusoids of different frequencies. Args: length: Length of the Tensor to create, i.e. Number of steps. min_timescale: a float max_timescale: a float num_timescales: an int Returns: Tensor of shape (length, 2*num_timescales) """ positions = tf.to_float(tf.range(length)) log_timescale_increment = ( math.log(max_timescale / min_timescale) / (num_timescales - 1)) inv_timescales = min_timescale * tf.exp( tf.to_float(tf.range(num_timescales)) * -log_timescale_increment) scaled_time = tf.expand_dims(positions, 1) * tf.expand_dims(inv_timescales, 0) return tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=1)
Example #2
Source File: distributions.py From DOTA_models with Apache License 2.0 | 6 votes |
def __init__(self, x_bxu, z_size, name, var_min=0.0): """Create an input dependent diagonal Gaussian distribution. Args: x: The input tensor from which the mean and variance are computed, via a linear transformation of x. I.e. mu = Wx + b, log(var) = Mx + c z_size: The size of the distribution. name: The name to prefix to learned variables. var_min (optional): Minimal variance allowed. This is an additional way to control the amount of information getting through the stochastic layer. """ size_bxn = tf.stack([tf.shape(x_bxu)[0], z_size]) self.mean_bxn = mean_bxn = linear(x_bxu, z_size, name=(name+"/mean")) logvar_bxn = linear(x_bxu, z_size, name=(name+"/logvar")) if var_min > 0.0: logvar_bxn = tf.log(tf.exp(logvar_bxn) + var_min) self.logvar_bxn = logvar_bxn self.noise_bxn = noise_bxn = tf.random_normal(size_bxn) self.noise_bxn.set_shape([None, z_size]) self.sample_bxn = mean_bxn + tf.exp(0.5 * logvar_bxn) * noise_bxn
Example #3
Source File: competition_model_class.py From Deep_Learning_Weather_Forecasting with Apache License 2.0 | 6 votes |
def minus_plus_std_strategy(self, pred_mean, pred_var, feature_name,\ timestep_to_ensemble=21, alpha=0): ''' This stratergy aims to calculate linear weighted at specific timestep (timestep_to_ensemble) between prediction and ruitu as formula: (alpha)*pred_mean + (1-alpha)*ruitu_inputs pred_mean: (10, 37, 3) pred_var: (10, 37, 3) timestep_to_ensemble: int32 (From 0 to 36) ''' print('Using minus_plus_var_strategy with alpha {}'.format(alpha)) assert 0<=timestep_to_ensemble<=36 , 'Please ensure 0<=timestep_to_ensemble<=36!' assert -0.3<= alpha <=0.3, '-0.3<= alpha <=0.3!' assert pred_mean.shape == (10, 37, 3), 'Error! This funtion ONLY works for \ one data sample with shape (10, 37, 3). Any data shape (None, 10, 37, 3) will leads this error!' pred_std = np.sqrt(np.exp(pred_var)) print('alpha:',alpha) pred_mean[:,timestep_to_ensemble:,self.obs_and_output_feature_index_map[feature_name]] = \ pred_mean[:,timestep_to_ensemble:,self.obs_and_output_feature_index_map[feature_name]] + \ alpha * pred_std[:,timestep_to_ensemble:,self.obs_and_output_feature_index_map[feature_name]] return pred_mean
Example #4
Source File: nn.py From cs294-112_hws with MIT License | 6 votes |
def call(self, inputs): mean_and_log_std = self.model(inputs) mean, log_std = tf.split(mean_and_log_std, num_or_size_splits=2, axis=1) log_std = tf.clip_by_value(log_std, -20., 2.) distribution = tfp.distributions.MultivariateNormalDiag( loc=mean, scale_diag=tf.exp(log_std) ) raw_actions = distribution.sample() if not self._reparameterize: ### Problem 1.3.A ### YOUR CODE HERE raw_actions = tf.stop_gradient(raw_actions) log_probs = distribution.log_prob(raw_actions) log_probs -= self._squash_correction(raw_actions) ### Problem 2.A ### YOUR CODE HERE self.actions = tf.tanh(raw_actions) return self.actions, log_probs
Example #5
Source File: distributions.py From DOTA_models with Apache License 2.0 | 6 votes |
def __init__(self, batch_size, z_size, mean, logvar): """Create a diagonal gaussian distribution. Args: batch_size: The size of the batch, i.e. 0th dim in 2D tensor of samples. z_size: The dimension of the distribution, i.e. 1st dim in 2D tensor. mean: The N-D mean of the distribution. logvar: The N-D log variance of the diagonal distribution. """ size__xz = [None, z_size] self.mean = mean # bxn already self.logvar = logvar # bxn already self.noise = noise = tf.random_normal(tf.shape(logvar)) self.sample = mean + tf.exp(0.5 * logvar) * noise mean.set_shape(size__xz) logvar.set_shape(size__xz) self.sample.set_shape(size__xz)
Example #6
Source File: competition_model_class.py From Deep_Learning_Weather_Forecasting with Apache License 2.0 | 6 votes |
def build_graph(self): #keras.backend.clear_session() # clear session/graph self.optimizer = keras.optimizers.Adam(lr=self.lr, decay=self.decay) self.model = Seq2Seq_MVE_subnets_swish(id_embd=True, time_embd=True, lr=self.lr, decay=self.decay, num_input_features=self.num_input_features, num_output_features=self.num_output_features, num_decoder_features=self.num_decoder_features, layers=self.layers, loss=self.loss, regulariser=self.regulariser) def _mve_loss(y_true, y_pred): pred_u = crop(2,0,3)(y_pred) pred_sig = crop(2,3,6)(y_pred) print(pred_sig) #exp_sig = tf.exp(pred_sig) # avoid pred_sig is too small such as zero #precision = 1./exp_sig precision = 1./pred_sig #log_loss= 0.5*tf.log(exp_sig)+0.5*precision*((pred_u-y_true)**2) log_loss= 0.5*tf.log(pred_sig)+0.5*precision*((pred_u-y_true)**2) log_loss=tf.reduce_mean(log_loss) return log_loss print(self.model.summary()) self.model.compile(optimizer = self.optimizer, loss=_mve_loss)
Example #7
Source File: distributions.py From DOTA_models with Apache License 2.0 | 6 votes |
def diag_gaussian_log_likelihood(z, mu=0.0, logvar=0.0): """Log-likelihood under a Gaussian distribution with diagonal covariance. Returns the log-likelihood for each dimension. One should sum the results for the log-likelihood under the full multidimensional model. Args: z: The value to compute the log-likelihood. mu: The mean of the Gaussian logvar: The log variance of the Gaussian. Returns: The log-likelihood under the Gaussian model. """ return -0.5 * (logvar + np.log(2*np.pi) + \ tf.square((z-mu)/tf.exp(0.5*logvar)))
Example #8
Source File: train_policy.py From cs294-112_hws with MIT License | 6 votes |
def sample_action(self, policy_parameters): """ constructs a symbolic operation for stochastically sampling from the policy distribution arguments: policy_parameters mean, log_std) of a Gaussian distribution over actions sy_mean: (batch_size, self.ac_dim) sy_logstd: (batch_size, self.ac_dim) returns: sy_sampled_ac: (batch_size, self.ac_dim) """ sy_mean, sy_logstd = policy_parameters sy_sampled_ac = sy_mean + tf.exp(sy_logstd) * tf.random_normal(tf.shape(sy_mean), 0, 1) return sy_sampled_ac
Example #9
Source File: utils.py From DOTA_models with Apache License 2.0 | 6 votes |
def gaussian_kernel_matrix(x, y, sigmas): r"""Computes a Guassian Radial Basis Kernel between the samples of x and y. We create a sum of multiple gaussian kernels each having a width sigma_i. Args: x: a tensor of shape [num_samples, num_features] y: a tensor of shape [num_samples, num_features] sigmas: a tensor of floats which denote the widths of each of the gaussians in the kernel. Returns: A tensor of shape [num_samples{x}, num_samples{y}] with the RBF kernel. """ beta = 1. / (2. * (tf.expand_dims(sigmas, 1))) dist = compute_pairwise_distances(x, y) s = tf.matmul(beta, tf.reshape(dist, (1, -1))) return tf.reshape(tf.reduce_sum(tf.exp(-s), 0), tf.shape(dist))
Example #10
Source File: utility.py From soccer-matlab with BSD 2-Clause "Simplified" License | 6 votes |
def diag_normal_logpdf(mean, logstd, loc): """Log density of a normal with diagonal covariance.""" constant = -0.5 * math.log(2 * math.pi) - logstd value = -0.5 * ((loc - mean) / tf.exp(logstd)) ** 2 return tf.reduce_sum(constant + value, -1)
Example #11
Source File: discretization.py From fine-lm with MIT License | 6 votes |
def vae(x, name, z_size): """Simple variational autoencoder without discretization. Args: x: Input to the discretization bottleneck. name: Name for the bottleneck scope. z_size: Number of bits used to produce discrete code; discrete codes range from 1 to 2**z_size. Returns: Embedding function, latent, loss, mu and log_simga. """ with tf.variable_scope(name): mu = tf.layers.dense(x, z_size, name="mu") log_sigma = tf.layers.dense(x, z_size, name="log_sigma") shape = common_layers.shape_list(x) epsilon = tf.random_normal([shape[0], shape[1], 1, z_size]) z = mu + tf.exp(log_sigma / 2) * epsilon kl = 0.5 * tf.reduce_mean( tf.exp(log_sigma) + tf.square(mu) - 1. - log_sigma, axis=-1) free_bits = z_size // 4 kl_loss = tf.reduce_mean(tf.maximum(kl - free_bits, 0.0)) return z, kl_loss, mu, log_sigma
Example #12
Source File: density_model.py From cs294-112_hws with MIT License | 6 votes |
def make_encoder(self, state, z_size, scope, n_layers, hid_size): """ ### PROBLEM 3 ### YOUR CODE HERE args: state: tf variable z_size: output dimension of the encoder network scope: scope name n_layers: number of layers of the encoder network hid_size: hidden dimension of encoder network TODO: 1. z_mean: the output of a neural network that takes the state as input, has output dimension z_size, n_layers layers, and hidden dimension hid_size 2. z_logstd: a trainable variable, initialized to 0 shape (z_size,) Hint: use build_mlp """ z_mean = build_mlp(state, z_size, scope, n_layers, hid_size) z_logstd = tf.get_variable('z_logstd', shape=z_size, trainable=True, initializer=tf.constant_initializer(value=0.)) return tfp.distributions.MultivariateNormalDiag(loc=z_mean, scale_diag=tf.exp(z_logstd))
Example #13
Source File: learning_rate.py From fine-lm with MIT License | 6 votes |
def _learning_rate_warmup(warmup_steps, warmup_schedule="exp", hparams=None): """Learning rate warmup multiplier.""" if not warmup_steps: return tf.constant(1.) tf.logging.info("Applying %s learning rate warmup for %d steps", warmup_schedule, warmup_steps) warmup_steps = tf.to_float(warmup_steps) global_step = _global_step(hparams) if warmup_schedule == "exp": return tf.exp(tf.log(0.01) / warmup_steps)**(warmup_steps - global_step) else: assert warmup_schedule == "linear" start = tf.constant(0.35) return ((tf.constant(1.) - start) / warmup_steps) * global_step + start
Example #14
Source File: autoencoders.py From fine-lm with MIT License | 6 votes |
def bottleneck(self, x): # pylint: disable=arguments-differ hparams = self.hparams if hparams.unordered: return super(AutoencoderOrderedDiscrete, self).bottleneck(x) noise = hparams.bottleneck_noise hparams.bottleneck_noise = 0.0 # We'll add noise below. x, loss = discretization.parametrized_bottleneck(x, hparams) hparams.bottleneck_noise = noise if hparams.mode == tf.estimator.ModeKeys.TRAIN: # We want a number p such that p^bottleneck_bits = 1 - noise. # So log(p) * bottleneck_bits = log(noise) log_p = tf.log(1 - float(noise) / 2) / float(hparams.bottleneck_bits) # Probabilities of flipping are p, p^2, p^3, ..., p^bottleneck_bits. noise_mask = 1.0 - tf.exp(tf.cumsum(tf.zeros_like(x) + log_p, axis=-1)) # Having the no-noise mask, we can make noise just uniformly at random. ordered_noise = tf.random_uniform(tf.shape(x)) # We want our noise to be 1s at the start and random {-1, 1} bits later. ordered_noise = tf.to_float(tf.less(noise_mask, ordered_noise)) # Now we flip the bits of x on the noisy positions (ordered and normal). x *= 2.0 * ordered_noise - 1 return x, loss
Example #15
Source File: yellowfin.py From fine-lm with MIT License | 5 votes |
def _curvature_range(self): """Curvature range. Returns: h_max_t, h_min_t ops """ self._curv_win = tf.get_variable("curv_win", dtype=tf.float32, trainable=False, shape=[self.curvature_window_width,], initializer=tf.zeros_initializer) # We use log smoothing for curvature range self._curv_win = tf.scatter_update(self._curv_win, self._step % self.curvature_window_width, tf.log(self._grad_norm_squared)) # Note here the iterations start from iteration 0 valid_window = tf.slice(self._curv_win, tf.constant([0,]), tf.expand_dims( tf.minimum( tf.constant(self.curvature_window_width), self._step + 1), dim=0)) self._h_min_t = tf.reduce_min(valid_window) self._h_max_t = tf.reduce_max(valid_window) curv_range_ops = [] with tf.control_dependencies([self._h_min_t, self._h_max_t]): avg_op = self._moving_averager.apply([self._h_min_t, self._h_max_t]) with tf.control_dependencies([avg_op]): self._h_min = tf.exp( tf.identity(self._moving_averager.average(self._h_min_t))) self._h_max = tf.exp( tf.identity(self._moving_averager.average(self._h_max_t))) if self._sparsity_debias: self._h_min *= self._sparsity_avg self._h_max *= self._sparsity_avg curv_range_ops.append(avg_op) return curv_range_ops # h_max_t, h_min_t
Example #16
Source File: policies.py From lirpg with MIT License | 5 votes |
def __init__(self, ob_dim, ac_dim): # Here we'll construct a bunch of expressions, which will be used in two places: # (1) When sampling actions # (2) When computing loss functions, for the policy update # Variables specific to (1) have the word "sampled" in them, # whereas variables specific to (2) have the word "old" in them ob_no = tf.placeholder(tf.float32, shape=[None, ob_dim*2], name="ob") # batch of observations oldac_na = tf.placeholder(tf.float32, shape=[None, ac_dim], name="ac") # batch of actions previous actions oldac_dist = tf.placeholder(tf.float32, shape=[None, ac_dim*2], name="oldac_dist") # batch of actions previous action distributions adv_n = tf.placeholder(tf.float32, shape=[None], name="adv") # advantage function estimate wd_dict = {} h1 = tf.nn.tanh(dense(ob_no, 64, "h1", weight_init=U.normc_initializer(1.0), bias_init=0.0, weight_loss_dict=wd_dict)) h2 = tf.nn.tanh(dense(h1, 64, "h2", weight_init=U.normc_initializer(1.0), bias_init=0.0, weight_loss_dict=wd_dict)) mean_na = dense(h2, ac_dim, "mean", weight_init=U.normc_initializer(0.1), bias_init=0.0, weight_loss_dict=wd_dict) # Mean control output self.wd_dict = wd_dict self.logstd_1a = logstd_1a = tf.get_variable("logstd", [ac_dim], tf.float32, tf.zeros_initializer()) # Variance on outputs logstd_1a = tf.expand_dims(logstd_1a, 0) std_1a = tf.exp(logstd_1a) std_na = tf.tile(std_1a, [tf.shape(mean_na)[0], 1]) ac_dist = tf.concat([tf.reshape(mean_na, [-1, ac_dim]), tf.reshape(std_na, [-1, ac_dim])], 1) sampled_ac_na = tf.random_normal(tf.shape(ac_dist[:,ac_dim:])) * ac_dist[:,ac_dim:] + ac_dist[:,:ac_dim] # This is the sampled action we'll perform. logprobsampled_n = - tf.reduce_sum(tf.log(ac_dist[:,ac_dim:]), axis=1) - 0.5 * tf.log(2.0*np.pi)*ac_dim - 0.5 * tf.reduce_sum(tf.square(ac_dist[:,:ac_dim] - sampled_ac_na) / (tf.square(ac_dist[:,ac_dim:])), axis=1) # Logprob of sampled action logprob_n = - tf.reduce_sum(tf.log(ac_dist[:,ac_dim:]), axis=1) - 0.5 * tf.log(2.0*np.pi)*ac_dim - 0.5 * tf.reduce_sum(tf.square(ac_dist[:,:ac_dim] - oldac_na) / (tf.square(ac_dist[:,ac_dim:])), axis=1) # Logprob of previous actions under CURRENT policy (whereas oldlogprob_n is under OLD policy) kl = tf.reduce_mean(kl_div(oldac_dist, ac_dist, ac_dim)) #kl = .5 * tf.reduce_mean(tf.square(logprob_n - oldlogprob_n)) # Approximation of KL divergence between old policy used to generate actions, and new policy used to compute logprob_n surr = - tf.reduce_mean(adv_n * logprob_n) # Loss function that we'll differentiate to get the policy gradient surr_sampled = - tf.reduce_mean(logprob_n) # Sampled loss of the policy self._act = U.function([ob_no], [sampled_ac_na, ac_dist, logprobsampled_n]) # Generate a new action and its logprob #self.compute_kl = U.function([ob_no, oldac_na, oldlogprob_n], kl) # Compute (approximate) KL divergence between old policy and new policy self.compute_kl = U.function([ob_no, oldac_dist], kl) self.update_info = ((ob_no, oldac_na, adv_n), surr, surr_sampled) # Input and output variables needed for computing loss U.initialize() # Initialize uninitialized TF variables
Example #17
Source File: distributions.py From lirpg with MIT License | 5 votes |
def kl(self, other): a0 = self.logits - tf.reduce_max(self.logits, axis=-1, keepdims=True) a1 = other.logits - tf.reduce_max(other.logits, axis=-1, keepdims=True) ea0 = tf.exp(a0) ea1 = tf.exp(a1) z0 = tf.reduce_sum(ea0, axis=-1, keepdims=True) z1 = tf.reduce_sum(ea1, axis=-1, keepdims=True) p0 = ea0 / z0 return tf.reduce_sum(p0 * (a0 - tf.log(z0) - a1 + tf.log(z1)), axis=-1)
Example #18
Source File: distributions.py From lirpg with MIT License | 5 votes |
def __init__(self, flat): self.flat = flat mean, logstd = tf.split(axis=len(flat.shape)-1, num_or_size_splits=2, value=flat) self.mean = mean self.logstd = logstd self.std = tf.exp(logstd)
Example #19
Source File: distributions.py From lirpg with MIT License | 5 votes |
def entropy(self): a0 = self.logits - tf.reduce_max(self.logits, axis=-1, keepdims=True) ea0 = tf.exp(a0) z0 = tf.reduce_sum(ea0, axis=-1, keepdims=True) p0 = ea0 / z0 return tf.reduce_sum(p0 * (tf.log(z0) - a0), axis=-1)
Example #20
Source File: ops.py From Generative-Latent-Optimization-Tensorflow with MIT License | 5 votes |
def selu(x): alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 return scale * tf.where(x > 0.0, x, alpha * tf.exp(x) - alpha)
Example #21
Source File: next_frame.py From fine-lm with MIT License | 5 votes |
def get_scheduled_sample_inputs( self, done_warm_start, groundtruth_items, generated_items, batch_size): with tf.variable_scope("scheduled_sampling", reuse=tf.AUTO_REUSE): if self.hparams.mode != tf.estimator.ModeKeys.TRAIN: feedself = True else: # Scheduled sampling: # Calculate number of ground-truth frames to pass in. feedself = False iter_num = tf.train.get_global_step() # TODO(mbz): what should it be if it's undefined? if iter_num is None: iter_num = _LARGE_STEP_NUMBER k = self.hparams.scheduled_sampling_k num_ground_truth = tf.to_int32( tf.round( tf.to_float(batch_size) * (k / (k + tf.exp(tf.to_float(iter_num) / tf.to_float(k)))))) if feedself and done_warm_start: # Feed in generated stuff. output_items = generated_items elif done_warm_start: output_items = [] for item_gt, item_gen in zip(groundtruth_items, generated_items): # Scheduled sampling output_items.append(self.scheduled_sample( item_gt, item_gen, batch_size, num_ground_truth)) else: # Feed in ground_truth output_items = groundtruth_items return output_items # TODO(mbz): use tf.distributions.kl_divergence instead.
Example #22
Source File: op.py From ArtGAN with BSD 3-Clause "New" or "Revised" License | 5 votes |
def log_sum_exp(x, axis=1): m = tf.reduce_max(x, axis=axis, keep_dims=True) return m + tf.log(tf.reduce_sum(tf.exp(x - m), axis=axis))
Example #23
Source File: train_ac_exploration_f18.py From cs294-112_hws with MIT License | 5 votes |
def sample_action(self, policy_parameters): """ Constructs a symbolic operation for stochastically sampling from the policy distribution arguments: policy_parameters if discrete: logits of a categorical distribution over actions sy_logits_na: (batch_size, self.ac_dim) if continuous: (mean, log_std) of a Gaussian distribution over actions sy_mean: (batch_size, self.ac_dim) sy_logstd: (self.ac_dim,) returns: sy_sampled_ac: if discrete: (batch_size) if continuous: (batch_size, self.ac_dim) Hint: for the continuous case, use the reparameterization trick: The output from a Gaussian distribution with mean 'mu' and std 'sigma' is mu + sigma * z, z ~ N(0, I) This reduces the problem to just sampling z. (Hint: use tf.random_normal!) """ if self.discrete: sy_logits_na = policy_parameters sy_sampled_ac = tf.squeeze(tf.multinomial(sy_logits_na, num_samples=1), axis=1) else: sy_mean, sy_logstd = policy_parameters sy_sampled_ac = sy_mean + tf.exp(sy_logstd) * tf.random_normal(tf.shape(sy_mean), 0, 1) return sy_sampled_ac
Example #24
Source File: density_model.py From cs294-112_hws with MIT License | 5 votes |
def make_prior(self, z_size): """ ### PROBLEM 3 ### YOUR CODE HERE args: z_size: output dimension of the encoder network TODO: prior_mean and prior_logstd are for a standard normal distribution both have dimension z_size """ prior_mean = tf.zeros(z_size) prior_logstd = tf.zeros(z_size) return tfp.distributions.MultivariateNormalDiag(loc=prior_mean, scale_diag=tf.exp(prior_logstd))
Example #25
Source File: train_policy.py From cs294-112_hws with MIT License | 5 votes |
def ppo_loss(self, log_probs, fixed_log_probs, advantages, clip_epsilon=0.1, entropy_coeff=1e-4): """ given: clip_epsilon arguments: advantages (mini_bsize,) states (mini_bsize,) actions (mini_bsize,) fixed_log_probs (mini_bsize,) intermediate results: states, actions --> log_probs log_probs, fixed_log_probs --> ratio advantages, ratio --> surr1 ratio, clip_epsilon, advantages --> surr2 surr1, surr2 --> policy_surr_loss """ ratio = tf.exp(log_probs - fixed_log_probs) surr1 = ratio * advantages surr2 = tf.clip_by_value(ratio, clip_value_min=1.0-clip_epsilon, clip_value_max=1.0+clip_epsilon) * advantages policy_surr_loss = -tf.reduce_mean(tf.minimum(surr1, surr2)) probs = tf.exp(log_probs) entropy = tf.reduce_sum(-(log_probs * probs)) policy_surr_loss -= entropy_coeff * entropy return policy_surr_loss
Example #26
Source File: train_pg_f18.py From cs294-112_hws with MIT License | 5 votes |
def get_neg_log_prob(self, policy_parameters, sy_ac_na): """ Constructs a symbolic operation for computing the negative log probability of a set of actions that were actually taken according to the policy arguments: policy_parameters if discrete: logits of a categorical distribution over actions sy_logits_na: (batch_size, self.ac_dim) if continuous: (mean, log_std) of a Gaussian distribution over actions sy_mean: (batch_size, self.ac_dim) sy_logstd: (self.ac_dim,) sy_ac_na: if discrete: (batch_size,) if continuous: (batch_size, self.ac_dim) returns: sy_neg_logprob_n: (batch_size) Hint: For the discrete case, use the log probability under a categorical distribution. For the continuous case, use the log probability under a multivariate gaussian. """ if self.discrete: sy_logits_na = policy_parameters # YOUR_CODE_HERE sy_neg_logprob_n = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=sy_ac_na, logits=sy_logits_na ) else: sy_mean, sy_logstd = policy_parameters # YOUR_CODE_HERE sy = (sy_ac_na - sy_mean) / tf.exp(sy_logstd) sy_neg_logprob_n = 0.5 * tf.reduce_sum(sy * sy, axis=1) return sy_neg_logprob_n
Example #27
Source File: train_pg_f18.py From cs294-112_hws with MIT License | 5 votes |
def sample_action(self, policy_parameters): """ Constructs a symbolic operation for stochastically sampling from the policy distribution arguments: policy_parameters if discrete: logits of a categorical distribution over actions sy_logits_na: (batch_size, self.ac_dim) if continuous: (mean, log_std) of a Gaussian distribution over actions sy_mean: (batch_size, self.ac_dim) sy_logstd: (self.ac_dim,) returns: sy_sampled_ac: if discrete: (batch_size,) if continuous: (batch_size, self.ac_dim) Hint: for the continuous case, use the reparameterization trick: The output from a Gaussian distribution with mean 'mu' and std 'sigma' is mu + sigma * z, z ~ N(0, I) This reduces the problem to just sampling z. (Hint: use tf.random_normal!) """ if self.discrete: sy_logits_na = policy_parameters # YOUR_CODE_HERE sy_sampled_ac = tf.squeeze(tf.multinomial(sy_logits_na, 1), axis=1) else: sy_mean, sy_logstd = policy_parameters # YOUR_CODE_HERE sy_sampled_ac = sy_mean + tf.exp(sy_logstd) * tf.random_normal(tf.shape(sy_mean)) return sy_sampled_ac #========================================================================================# # ----------PROBLEM 2---------- #========================================================================================#
Example #28
Source File: train_ac_f18.py From cs294-112_hws with MIT License | 5 votes |
def get_log_prob(self, policy_parameters, sy_ac_na): """ Constructs a symbolic operation for computing the log probability of a set of actions that were actually taken according to the policy arguments: policy_parameters if discrete: logits of a categorical distribution over actions sy_logits_na: (batch_size, self.ac_dim) if continuous: (mean, log_std) of a Gaussian distribution over actions sy_mean: (batch_size, self.ac_dim) sy_logstd: (self.ac_dim,) sy_ac_na: (batch_size, self.ac_dim) returns: sy_logprob_n: (batch_size) Hint: For the discrete case, use the log probability under a categorical distribution. For the continuous case, use the log probability under a multivariate gaussian. """ if self.discrete: sy_logits_na = policy_parameters # YOUR_HW2 CODE_HERE sy_logprob_n = -tf.nn.sparse_softmax_cross_entropy_with_logits( labels=sy_ac_na, logits=sy_logits_na ) else: sy_mean, sy_logstd = policy_parameters # YOUR_HW2 CODE_HERE sy = (sy_ac_na - sy_mean) / tf.exp(sy_logstd) sy_logprob_n = -0.5 * tf.reduce_sum(sy * sy, axis=1) return sy_logprob_n
Example #29
Source File: train_ac_f18.py From cs294-112_hws with MIT License | 5 votes |
def sample_action(self, policy_parameters): """ Constructs a symbolic operation for stochastically sampling from the policy distribution arguments: policy_parameters if discrete: logits of a categorical distribution over actions sy_logits_na: (batch_size, self.ac_dim) if continuous: (mean, log_std) of a Gaussian distribution over actions sy_mean: (batch_size, self.ac_dim) sy_logstd: (self.ac_dim,) returns: sy_sampled_ac: if discrete: (batch_size) if continuous: (batch_size, self.ac_dim) Hint: for the continuous case, use the reparameterization trick: The output from a Gaussian distribution with mean 'mu' and std 'sigma' is mu + sigma * z, z ~ N(0, I) This reduces the problem to just sampling z. (Hint: use tf.random_normal!) """ if self.discrete: sy_logits_na = policy_parameters # YOUR_HW2 CODE_HERE sy_sampled_ac = tf.squeeze(tf.multinomial(sy_logits_na, 1), axis=1) else: sy_mean, sy_logstd = policy_parameters # YOUR_HW2 CODE_HERE sy_sampled_ac = sy_mean + tf.exp(sy_logstd) * tf.random_normal(tf.shape(sy_mean)) return sy_sampled_ac
Example #30
Source File: ops.py From SSGAN-Tensorflow with MIT License | 5 votes |
def selu(x): alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 return scale * tf.where(x > 0.0, x, alpha * tf.exp(x) - alpha)