Python Examples of tensorflow.exp

Source File: common_layers.py From fine-lm with MIT License

7 votes

def get_timing_signal(length,
                      min_timescale=1,
                      max_timescale=1e4,
                      num_timescales=16):
  """Create Tensor of sinusoids of different frequencies.

  Args:
    length: Length of the Tensor to create, i.e. Number of steps.
    min_timescale: a float
    max_timescale: a float
    num_timescales: an int

  Returns:
    Tensor of shape (length, 2*num_timescales)
  """
  positions = tf.to_float(tf.range(length))
  log_timescale_increment = (
      math.log(max_timescale / min_timescale) / (num_timescales - 1))
  inv_timescales = min_timescale * tf.exp(
      tf.to_float(tf.range(num_timescales)) * -log_timescale_increment)
  scaled_time = tf.expand_dims(positions, 1) * tf.expand_dims(inv_timescales, 0)
  return tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=1)

Source File: distributions.py From DOTA_models with Apache License 2.0

6 votes

def __init__(self, x_bxu, z_size, name, var_min=0.0):
    """Create an input dependent diagonal Gaussian distribution.

    Args:
      x: The input tensor from which the mean and variance are computed,
        via a linear transformation of x.  I.e.
          mu = Wx + b, log(var) = Mx + c
      z_size: The size of the distribution.
      name:  The name to prefix to learned variables.
      var_min (optional): Minimal variance allowed.  This is an additional
        way to control the amount of information getting through the stochastic
        layer.
    """
    size_bxn = tf.stack([tf.shape(x_bxu)[0], z_size])
    self.mean_bxn = mean_bxn = linear(x_bxu, z_size, name=(name+"/mean"))
    logvar_bxn = linear(x_bxu, z_size, name=(name+"/logvar"))
    if var_min > 0.0:
      logvar_bxn = tf.log(tf.exp(logvar_bxn) + var_min)
    self.logvar_bxn = logvar_bxn

    self.noise_bxn = noise_bxn = tf.random_normal(size_bxn)
    self.noise_bxn.set_shape([None, z_size])
    self.sample_bxn = mean_bxn + tf.exp(0.5 * logvar_bxn) * noise_bxn

Source File: competition_model_class.py From Deep_Learning_Weather_Forecasting with Apache License 2.0

6 votes

def minus_plus_std_strategy(self, pred_mean, pred_var, feature_name,\
                            timestep_to_ensemble=21, alpha=0):
        '''
        This stratergy aims to calculate linear weighted at specific timestep (timestep_to_ensemble) between prediction and ruitu as formula:
                                    (alpha)*pred_mean + (1-alpha)*ruitu_inputs
        pred_mean: (10, 37, 3)
        pred_var: (10, 37, 3)
        timestep_to_ensemble: int32 (From 0 to 36)
        '''
        print('Using minus_plus_var_strategy with alpha {}'.format(alpha))
        assert 0<=timestep_to_ensemble<=36 , 'Please ensure 0<=timestep_to_ensemble<=36!'
        assert -0.3<= alpha <=0.3, '-0.3<= alpha <=0.3!'
        assert pred_mean.shape == (10, 37, 3), 'Error! This funtion ONLY works for \
        one data sample with shape (10, 37, 3). Any data shape (None, 10, 37, 3) will leads this error!'
        pred_std = np.sqrt(np.exp(pred_var))           
        print('alpha:',alpha)

        pred_mean[:,timestep_to_ensemble:,self.obs_and_output_feature_index_map[feature_name]] = \
        pred_mean[:,timestep_to_ensemble:,self.obs_and_output_feature_index_map[feature_name]] + \
        alpha * pred_std[:,timestep_to_ensemble:,self.obs_and_output_feature_index_map[feature_name]]

        return pred_mean

Source File: nn.py From cs294-112_hws with MIT License

6 votes

def call(self, inputs):
        mean_and_log_std = self.model(inputs)
        mean, log_std = tf.split(mean_and_log_std, num_or_size_splits=2, axis=1)
        log_std = tf.clip_by_value(log_std, -20., 2.)
        
        distribution = tfp.distributions.MultivariateNormalDiag(
            loc=mean,
            scale_diag=tf.exp(log_std)
        )
        
        raw_actions = distribution.sample()
        if not self._reparameterize:
            ### Problem 1.3.A
            ### YOUR CODE HERE
            raw_actions = tf.stop_gradient(raw_actions)
        log_probs = distribution.log_prob(raw_actions)
        log_probs -= self._squash_correction(raw_actions)

        ### Problem 2.A
        ### YOUR CODE HERE
        self.actions = tf.tanh(raw_actions)
            
        return self.actions, log_probs

Source File: distributions.py From DOTA_models with Apache License 2.0

6 votes

def __init__(self, batch_size, z_size, mean, logvar):
    """Create a diagonal gaussian distribution.

    Args:
      batch_size: The size of the batch, i.e. 0th dim in 2D tensor of samples.
      z_size: The dimension of the distribution, i.e. 1st dim in 2D tensor.
      mean: The N-D mean of the distribution.
      logvar: The N-D log variance of the diagonal distribution.
    """
    size__xz = [None, z_size]
    self.mean = mean            # bxn already
    self.logvar = logvar        # bxn already
    self.noise = noise = tf.random_normal(tf.shape(logvar))
    self.sample = mean + tf.exp(0.5 * logvar) * noise
    mean.set_shape(size__xz)
    logvar.set_shape(size__xz)
    self.sample.set_shape(size__xz)

Source File: competition_model_class.py From Deep_Learning_Weather_Forecasting with Apache License 2.0

6 votes

def build_graph(self):
        #keras.backend.clear_session() # clear session/graph    
        self.optimizer = keras.optimizers.Adam(lr=self.lr, decay=self.decay)

        self.model = Seq2Seq_MVE_subnets_swish(id_embd=True, time_embd=True,
            lr=self.lr, decay=self.decay,
            num_input_features=self.num_input_features, num_output_features=self.num_output_features,
            num_decoder_features=self.num_decoder_features, layers=self.layers,
            loss=self.loss, regulariser=self.regulariser)

        def _mve_loss(y_true, y_pred):
            pred_u = crop(2,0,3)(y_pred)
            pred_sig = crop(2,3,6)(y_pred)
            print(pred_sig)
            #exp_sig = tf.exp(pred_sig) # avoid pred_sig is too small such as zero    
            #precision = 1./exp_sig
            precision = 1./pred_sig
            #log_loss= 0.5*tf.log(exp_sig)+0.5*precision*((pred_u-y_true)**2)
            log_loss= 0.5*tf.log(pred_sig)+0.5*precision*((pred_u-y_true)**2)            
          
            log_loss=tf.reduce_mean(log_loss)
            return log_loss

        print(self.model.summary())
        self.model.compile(optimizer = self.optimizer, loss=_mve_loss)

Source File: distributions.py From DOTA_models with Apache License 2.0

6 votes

def diag_gaussian_log_likelihood(z, mu=0.0, logvar=0.0):
  """Log-likelihood under a Gaussian distribution with diagonal covariance.
    Returns the log-likelihood for each dimension.  One should sum the
    results for the log-likelihood under the full multidimensional model.

  Args:
    z: The value to compute the log-likelihood.
    mu: The mean of the Gaussian
    logvar: The log variance of the Gaussian.

  Returns:
    The log-likelihood under the Gaussian model.
  """

  return -0.5 * (logvar + np.log(2*np.pi) + \
                 tf.square((z-mu)/tf.exp(0.5*logvar)))

Source File: train_policy.py From cs294-112_hws with MIT License

6 votes

def sample_action(self, policy_parameters):
        """
        constructs a symbolic operation for stochastically sampling from the policy
        distribution

        arguments:
            policy_parameters
                mean, log_std) of a Gaussian distribution over actions
                    sy_mean: (batch_size, self.ac_dim)
                    sy_logstd: (batch_size, self.ac_dim)

        returns:
            sy_sampled_ac:
                (batch_size, self.ac_dim)
        """
        sy_mean, sy_logstd = policy_parameters
        sy_sampled_ac = sy_mean + tf.exp(sy_logstd) * tf.random_normal(tf.shape(sy_mean), 0, 1)
        return sy_sampled_ac

Source File: utils.py From DOTA_models with Apache License 2.0

6 votes

def gaussian_kernel_matrix(x, y, sigmas):
  r"""Computes a Guassian Radial Basis Kernel between the samples of x and y.

  We create a sum of multiple gaussian kernels each having a width sigma_i.

  Args:
    x: a tensor of shape [num_samples, num_features]
    y: a tensor of shape [num_samples, num_features]
    sigmas: a tensor of floats which denote the widths of each of the
      gaussians in the kernel.
  Returns:
    A tensor of shape [num_samples{x}, num_samples{y}] with the RBF kernel.
  """
  beta = 1. / (2. * (tf.expand_dims(sigmas, 1)))

  dist = compute_pairwise_distances(x, y)

  s = tf.matmul(beta, tf.reshape(dist, (1, -1)))

  return tf.reshape(tf.reduce_sum(tf.exp(-s), 0), tf.shape(dist))

Source File: utility.py From soccer-matlab with BSD 2-Clause "Simplified" License

6 votes

def diag_normal_logpdf(mean, logstd, loc):
  """Log density of a normal with diagonal covariance."""
  constant = -0.5 * math.log(2 * math.pi) - logstd
  value = -0.5 * ((loc - mean) / tf.exp(logstd)) ** 2
  return tf.reduce_sum(constant + value, -1)

Source File: discretization.py From fine-lm with MIT License

6 votes

def vae(x, name, z_size):
  """Simple variational autoencoder without discretization.

  Args:
    x: Input to the discretization bottleneck.
    name: Name for the bottleneck scope.
    z_size: Number of bits used to produce discrete code; discrete codes range
      from 1 to 2**z_size.

  Returns:
    Embedding function, latent, loss, mu and log_simga.
  """
  with tf.variable_scope(name):
    mu = tf.layers.dense(x, z_size, name="mu")
    log_sigma = tf.layers.dense(x, z_size, name="log_sigma")
    shape = common_layers.shape_list(x)
    epsilon = tf.random_normal([shape[0], shape[1], 1, z_size])
    z = mu + tf.exp(log_sigma / 2) * epsilon
    kl = 0.5 * tf.reduce_mean(
        tf.exp(log_sigma) + tf.square(mu) - 1. - log_sigma, axis=-1)
    free_bits = z_size // 4
    kl_loss = tf.reduce_mean(tf.maximum(kl - free_bits, 0.0))
    return z, kl_loss, mu, log_sigma

Source File: density_model.py From cs294-112_hws with MIT License

6 votes

def make_encoder(self, state, z_size, scope, n_layers, hid_size):
        """
            ### PROBLEM 3
            ### YOUR CODE HERE

            args:
                state: tf variable
                z_size: output dimension of the encoder network
                scope: scope name
                n_layers: number of layers of the encoder network
                hid_size: hidden dimension of encoder network

            TODO:
                1. z_mean: the output of a neural network that takes the state as input,
                    has output dimension z_size, n_layers layers, and hidden 
                    dimension hid_size
                2. z_logstd: a trainable variable, initialized to 0
                    shape (z_size,)

            Hint: use build_mlp
        """
        z_mean = build_mlp(state, z_size, scope, n_layers, hid_size)
        z_logstd = tf.get_variable('z_logstd', shape=z_size, trainable=True,
                                   initializer=tf.constant_initializer(value=0.))
        return tfp.distributions.MultivariateNormalDiag(loc=z_mean, scale_diag=tf.exp(z_logstd))

Source File: learning_rate.py From fine-lm with MIT License

6 votes

def _learning_rate_warmup(warmup_steps, warmup_schedule="exp", hparams=None):
  """Learning rate warmup multiplier."""
  if not warmup_steps:
    return tf.constant(1.)

  tf.logging.info("Applying %s learning rate warmup for %d steps",
                  warmup_schedule, warmup_steps)

  warmup_steps = tf.to_float(warmup_steps)
  global_step = _global_step(hparams)

  if warmup_schedule == "exp":
    return tf.exp(tf.log(0.01) / warmup_steps)**(warmup_steps - global_step)
  else:
    assert warmup_schedule == "linear"
    start = tf.constant(0.35)
    return ((tf.constant(1.) - start) / warmup_steps) * global_step + start

Source File: autoencoders.py From fine-lm with MIT License

6 votes

def bottleneck(self, x):  # pylint: disable=arguments-differ
    hparams = self.hparams
    if hparams.unordered:
      return super(AutoencoderOrderedDiscrete, self).bottleneck(x)
    noise = hparams.bottleneck_noise
    hparams.bottleneck_noise = 0.0  # We'll add noise below.
    x, loss = discretization.parametrized_bottleneck(x, hparams)
    hparams.bottleneck_noise = noise
    if hparams.mode == tf.estimator.ModeKeys.TRAIN:
      # We want a number p such that p^bottleneck_bits = 1 - noise.
      # So log(p) * bottleneck_bits = log(noise)
      log_p = tf.log(1 - float(noise) / 2) / float(hparams.bottleneck_bits)
      # Probabilities of flipping are p, p^2, p^3, ..., p^bottleneck_bits.
      noise_mask = 1.0 - tf.exp(tf.cumsum(tf.zeros_like(x) + log_p, axis=-1))
      # Having the no-noise mask, we can make noise just uniformly at random.
      ordered_noise = tf.random_uniform(tf.shape(x))
      # We want our noise to be 1s at the start and random {-1, 1} bits later.
      ordered_noise = tf.to_float(tf.less(noise_mask, ordered_noise))
      # Now we flip the bits of x on the noisy positions (ordered and normal).
      x *= 2.0 * ordered_noise - 1
    return x, loss

Source File: yellowfin.py From fine-lm with MIT License

5 votes

def _curvature_range(self):
    """Curvature range.

    Returns:
      h_max_t, h_min_t ops
    """
    self._curv_win = tf.get_variable("curv_win",
                                     dtype=tf.float32,
                                     trainable=False,
                                     shape=[self.curvature_window_width,],
                                     initializer=tf.zeros_initializer)
    # We use log smoothing for curvature range
    self._curv_win = tf.scatter_update(self._curv_win,
                                       self._step % self.curvature_window_width,
                                       tf.log(self._grad_norm_squared))
    # Note here the iterations start from iteration 0
    valid_window = tf.slice(self._curv_win,
                            tf.constant([0,]),
                            tf.expand_dims(
                                tf.minimum(
                                    tf.constant(self.curvature_window_width),
                                    self._step + 1), dim=0))
    self._h_min_t = tf.reduce_min(valid_window)
    self._h_max_t = tf.reduce_max(valid_window)

    curv_range_ops = []
    with tf.control_dependencies([self._h_min_t, self._h_max_t]):
      avg_op = self._moving_averager.apply([self._h_min_t, self._h_max_t])
      with tf.control_dependencies([avg_op]):
        self._h_min = tf.exp(
            tf.identity(self._moving_averager.average(self._h_min_t)))
        self._h_max = tf.exp(
            tf.identity(self._moving_averager.average(self._h_max_t)))
        if self._sparsity_debias:
          self._h_min *= self._sparsity_avg
          self._h_max *= self._sparsity_avg
    curv_range_ops.append(avg_op)
    return curv_range_ops  # h_max_t, h_min_t

Source File: policies.py From lirpg with MIT License

5 votes

def __init__(self, ob_dim, ac_dim):
        # Here we'll construct a bunch of expressions, which will be used in two places:
        # (1) When sampling actions
        # (2) When computing loss functions, for the policy update
        # Variables specific to (1) have the word "sampled" in them,
        # whereas variables specific to (2) have the word "old" in them
        ob_no = tf.placeholder(tf.float32, shape=[None, ob_dim*2], name="ob") # batch of observations
        oldac_na = tf.placeholder(tf.float32, shape=[None, ac_dim], name="ac") # batch of actions previous actions
        oldac_dist = tf.placeholder(tf.float32, shape=[None, ac_dim*2], name="oldac_dist") # batch of actions previous action distributions
        adv_n = tf.placeholder(tf.float32, shape=[None], name="adv") # advantage function estimate
        wd_dict = {}
        h1 = tf.nn.tanh(dense(ob_no, 64, "h1", weight_init=U.normc_initializer(1.0), bias_init=0.0, weight_loss_dict=wd_dict))
        h2 = tf.nn.tanh(dense(h1, 64, "h2", weight_init=U.normc_initializer(1.0), bias_init=0.0, weight_loss_dict=wd_dict))
        mean_na = dense(h2, ac_dim, "mean", weight_init=U.normc_initializer(0.1), bias_init=0.0, weight_loss_dict=wd_dict) # Mean control output
        self.wd_dict = wd_dict
        self.logstd_1a = logstd_1a = tf.get_variable("logstd", [ac_dim], tf.float32, tf.zeros_initializer()) # Variance on outputs
        logstd_1a = tf.expand_dims(logstd_1a, 0)
        std_1a = tf.exp(logstd_1a)
        std_na = tf.tile(std_1a, [tf.shape(mean_na)[0], 1])
        ac_dist = tf.concat([tf.reshape(mean_na, [-1, ac_dim]), tf.reshape(std_na, [-1, ac_dim])], 1)
        sampled_ac_na = tf.random_normal(tf.shape(ac_dist[:,ac_dim:])) * ac_dist[:,ac_dim:] + ac_dist[:,:ac_dim] # This is the sampled action we'll perform.
        logprobsampled_n = - tf.reduce_sum(tf.log(ac_dist[:,ac_dim:]), axis=1) - 0.5 * tf.log(2.0*np.pi)*ac_dim - 0.5 * tf.reduce_sum(tf.square(ac_dist[:,:ac_dim] - sampled_ac_na) / (tf.square(ac_dist[:,ac_dim:])), axis=1) # Logprob of sampled action
        logprob_n = - tf.reduce_sum(tf.log(ac_dist[:,ac_dim:]), axis=1) - 0.5 * tf.log(2.0*np.pi)*ac_dim - 0.5 * tf.reduce_sum(tf.square(ac_dist[:,:ac_dim] - oldac_na) / (tf.square(ac_dist[:,ac_dim:])), axis=1) # Logprob of previous actions under CURRENT policy (whereas oldlogprob_n is under OLD policy)
        kl = tf.reduce_mean(kl_div(oldac_dist, ac_dist, ac_dim))
        #kl = .5 * tf.reduce_mean(tf.square(logprob_n - oldlogprob_n)) # Approximation of KL divergence between old policy used to generate actions, and new policy used to compute logprob_n
        surr = - tf.reduce_mean(adv_n * logprob_n) # Loss function that we'll differentiate to get the policy gradient
        surr_sampled = - tf.reduce_mean(logprob_n) # Sampled loss of the policy
        self._act = U.function([ob_no], [sampled_ac_na, ac_dist, logprobsampled_n]) # Generate a new action and its logprob
        #self.compute_kl = U.function([ob_no, oldac_na, oldlogprob_n], kl) # Compute (approximate) KL divergence between old policy and new policy
        self.compute_kl = U.function([ob_no, oldac_dist], kl)
        self.update_info = ((ob_no, oldac_na, adv_n), surr, surr_sampled) # Input and output variables needed for computing loss
        U.initialize() # Initialize uninitialized TF variables

Source File: distributions.py From lirpg with MIT License

5 votes

def kl(self, other):
        a0 = self.logits - tf.reduce_max(self.logits, axis=-1, keepdims=True)
        a1 = other.logits - tf.reduce_max(other.logits, axis=-1, keepdims=True)
        ea0 = tf.exp(a0)
        ea1 = tf.exp(a1)
        z0 = tf.reduce_sum(ea0, axis=-1, keepdims=True)
        z1 = tf.reduce_sum(ea1, axis=-1, keepdims=True)
        p0 = ea0 / z0
        return tf.reduce_sum(p0 * (a0 - tf.log(z0) - a1 + tf.log(z1)), axis=-1)

Source File: distributions.py From lirpg with MIT License

5 votes

def __init__(self, flat):
        self.flat = flat
        mean, logstd = tf.split(axis=len(flat.shape)-1, num_or_size_splits=2, value=flat)
        self.mean = mean
        self.logstd = logstd
        self.std = tf.exp(logstd)

Source File: distributions.py From lirpg with MIT License

5 votes

def entropy(self):
        a0 = self.logits - tf.reduce_max(self.logits, axis=-1, keepdims=True)
        ea0 = tf.exp(a0)
        z0 = tf.reduce_sum(ea0, axis=-1, keepdims=True)
        p0 = ea0 / z0
        return tf.reduce_sum(p0 * (tf.log(z0) - a0), axis=-1)

Source File: ops.py From Generative-Latent-Optimization-Tensorflow with MIT License

5 votes

def selu(x):
    alpha = 1.6732632423543772848170429916717
    scale = 1.0507009873554804934193349852946
    return scale * tf.where(x > 0.0, x, alpha * tf.exp(x) - alpha)

Source File: next_frame.py From fine-lm with MIT License

5 votes

def get_scheduled_sample_inputs(
      self, done_warm_start, groundtruth_items, generated_items, batch_size):

    with tf.variable_scope("scheduled_sampling", reuse=tf.AUTO_REUSE):
      if self.hparams.mode != tf.estimator.ModeKeys.TRAIN:
        feedself = True
      else:
        # Scheduled sampling:
        # Calculate number of ground-truth frames to pass in.
        feedself = False
        iter_num = tf.train.get_global_step()
        # TODO(mbz): what should it be if it's undefined?
        if iter_num is None:
          iter_num = _LARGE_STEP_NUMBER
        k = self.hparams.scheduled_sampling_k
        num_ground_truth = tf.to_int32(
            tf.round(
                tf.to_float(batch_size) *
                (k / (k + tf.exp(tf.to_float(iter_num) / tf.to_float(k))))))

      if feedself and done_warm_start:
        # Feed in generated stuff.
        output_items = generated_items
      elif done_warm_start:
        output_items = []
        for item_gt, item_gen in zip(groundtruth_items, generated_items):
          # Scheduled sampling
          output_items.append(self.scheduled_sample(
              item_gt, item_gen, batch_size, num_ground_truth))
      else:
        # Feed in ground_truth
        output_items = groundtruth_items

      return output_items

  # TODO(mbz): use tf.distributions.kl_divergence instead.

Source File: op.py From ArtGAN with BSD 3-Clause "New" or "Revised" License

5 votes

def log_sum_exp(x, axis=1):
    m = tf.reduce_max(x, axis=axis, keep_dims=True)
    return m + tf.log(tf.reduce_sum(tf.exp(x - m), axis=axis))

Source File: train_ac_exploration_f18.py From cs294-112_hws with MIT License

5 votes

def sample_action(self, policy_parameters):
        """ Constructs a symbolic operation for stochastically sampling from the policy
            distribution

            arguments:
                policy_parameters
                    if discrete: logits of a categorical distribution over actions 
                        sy_logits_na: (batch_size, self.ac_dim)
                    if continuous: (mean, log_std) of a Gaussian distribution over actions
                        sy_mean: (batch_size, self.ac_dim)
                        sy_logstd: (self.ac_dim,)

            returns:
                sy_sampled_ac: 
                    if discrete: (batch_size)
                    if continuous: (batch_size, self.ac_dim)

            Hint: for the continuous case, use the reparameterization trick:
                 The output from a Gaussian distribution with mean 'mu' and std 'sigma' is
        
                      mu + sigma * z,         z ~ N(0, I)
        
                 This reduces the problem to just sampling z. (Hint: use tf.random_normal!)
        """
        if self.discrete:
            sy_logits_na = policy_parameters
            sy_sampled_ac = tf.squeeze(tf.multinomial(sy_logits_na, num_samples=1), axis=1)
        else:
            sy_mean, sy_logstd = policy_parameters
            sy_sampled_ac = sy_mean + tf.exp(sy_logstd) * tf.random_normal(tf.shape(sy_mean), 0, 1)
        return sy_sampled_ac

Source File: density_model.py From cs294-112_hws with MIT License

5 votes

def make_prior(self, z_size):
        """
            ### PROBLEM 3
            ### YOUR CODE HERE

            args:
                z_size: output dimension of the encoder network

            TODO:
                prior_mean and prior_logstd are for a standard normal distribution
                    both have dimension z_size
        """
        prior_mean = tf.zeros(z_size)
        prior_logstd = tf.zeros(z_size)
        return tfp.distributions.MultivariateNormalDiag(loc=prior_mean, scale_diag=tf.exp(prior_logstd))

Source File: train_policy.py From cs294-112_hws with MIT License

5 votes

def ppo_loss(self, log_probs, fixed_log_probs, advantages, clip_epsilon=0.1, entropy_coeff=1e-4):
        """
        given:
            clip_epsilon

        arguments:
            advantages (mini_bsize,)
            states (mini_bsize,)
            actions (mini_bsize,)
            fixed_log_probs (mini_bsize,)

        intermediate results:
            states, actions --> log_probs
            log_probs, fixed_log_probs --> ratio
            advantages, ratio --> surr1
            ratio, clip_epsilon, advantages --> surr2
            surr1, surr2 --> policy_surr_loss
        """
        ratio = tf.exp(log_probs - fixed_log_probs)
        surr1 = ratio * advantages
        surr2 = tf.clip_by_value(ratio, clip_value_min=1.0-clip_epsilon, clip_value_max=1.0+clip_epsilon) * advantages
        policy_surr_loss = -tf.reduce_mean(tf.minimum(surr1, surr2))

        probs = tf.exp(log_probs)
        entropy = tf.reduce_sum(-(log_probs * probs))
        policy_surr_loss -= entropy_coeff * entropy
        return policy_surr_loss

Source File: train_pg_f18.py From cs294-112_hws with MIT License

5 votes

def get_neg_log_prob(self, policy_parameters, sy_ac_na):
        """ Constructs a symbolic operation for computing the negative log probability of a set 
            of actions that were actually taken according to the policy

            arguments:
                policy_parameters
                    if discrete: logits of a categorical distribution over actions 
                        sy_logits_na: (batch_size, self.ac_dim)
                    if continuous: (mean, log_std) of a Gaussian distribution over actions
                        sy_mean: (batch_size, self.ac_dim)
                        sy_logstd: (self.ac_dim,)

                sy_ac_na: 
                    if discrete: (batch_size,)
                    if continuous: (batch_size, self.ac_dim)

            returns:
                sy_neg_logprob_n: (batch_size)

            Hint:
                For the discrete case, use the log probability under a categorical distribution.
                For the continuous case, use the log probability under a multivariate gaussian.
        """
        if self.discrete:
            sy_logits_na = policy_parameters
            # YOUR_CODE_HERE
            sy_neg_logprob_n = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=sy_ac_na, 
                logits=sy_logits_na
            )
        else:
            sy_mean, sy_logstd = policy_parameters
            # YOUR_CODE_HERE
            sy = (sy_ac_na - sy_mean) / tf.exp(sy_logstd)
            sy_neg_logprob_n = 0.5 * tf.reduce_sum(sy * sy, axis=1)
        return sy_neg_logprob_n

Source File: train_pg_f18.py From cs294-112_hws with MIT License

5 votes

def sample_action(self, policy_parameters):
        """ Constructs a symbolic operation for stochastically sampling from the policy
            distribution

            arguments:
                policy_parameters
                    if discrete: logits of a categorical distribution over actions 
                        sy_logits_na: (batch_size, self.ac_dim)
                    if continuous: (mean, log_std) of a Gaussian distribution over actions
                        sy_mean: (batch_size, self.ac_dim)
                        sy_logstd: (self.ac_dim,)

            returns:
                sy_sampled_ac: 
                    if discrete: (batch_size,)
                    if continuous: (batch_size, self.ac_dim)

            Hint: for the continuous case, use the reparameterization trick:
                 The output from a Gaussian distribution with mean 'mu' and std 'sigma' is
        
                      mu + sigma * z,         z ~ N(0, I)
        
                 This reduces the problem to just sampling z. (Hint: use tf.random_normal!)
        """
        if self.discrete:
            sy_logits_na = policy_parameters
            # YOUR_CODE_HERE
            sy_sampled_ac = tf.squeeze(tf.multinomial(sy_logits_na, 1), axis=1)
        else:
            sy_mean, sy_logstd = policy_parameters
            # YOUR_CODE_HERE
            sy_sampled_ac = sy_mean + tf.exp(sy_logstd) * tf.random_normal(tf.shape(sy_mean))
        return sy_sampled_ac

    #========================================================================================#
    #                           ----------PROBLEM 2----------
    #========================================================================================#

Source File: train_ac_f18.py From cs294-112_hws with MIT License

5 votes

def get_log_prob(self, policy_parameters, sy_ac_na):
        """ Constructs a symbolic operation for computing the log probability of a set of actions
            that were actually taken according to the policy

            arguments:
                policy_parameters
                    if discrete: logits of a categorical distribution over actions 
                        sy_logits_na: (batch_size, self.ac_dim)
                    if continuous: (mean, log_std) of a Gaussian distribution over actions
                        sy_mean: (batch_size, self.ac_dim)
                        sy_logstd: (self.ac_dim,)

                sy_ac_na: (batch_size, self.ac_dim)

            returns:
                sy_logprob_n: (batch_size)

            Hint:
                For the discrete case, use the log probability under a categorical distribution.
                For the continuous case, use the log probability under a multivariate gaussian.
        """
        if self.discrete:
            sy_logits_na = policy_parameters
            # YOUR_HW2 CODE_HERE
            sy_logprob_n = -tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=sy_ac_na, 
                logits=sy_logits_na
            )
        else:
            sy_mean, sy_logstd = policy_parameters
            # YOUR_HW2 CODE_HERE
            sy = (sy_ac_na - sy_mean) / tf.exp(sy_logstd)
            sy_logprob_n = -0.5 * tf.reduce_sum(sy * sy, axis=1)
        return sy_logprob_n

Source File: train_ac_f18.py From cs294-112_hws with MIT License

5 votes

def sample_action(self, policy_parameters):
        """ Constructs a symbolic operation for stochastically sampling from the policy
            distribution

            arguments:
                policy_parameters
                    if discrete: logits of a categorical distribution over actions 
                        sy_logits_na: (batch_size, self.ac_dim)
                    if continuous: (mean, log_std) of a Gaussian distribution over actions
                        sy_mean: (batch_size, self.ac_dim)
                        sy_logstd: (self.ac_dim,)

            returns:
                sy_sampled_ac: 
                    if discrete: (batch_size)
                    if continuous: (batch_size, self.ac_dim)

            Hint: for the continuous case, use the reparameterization trick:
                 The output from a Gaussian distribution with mean 'mu' and std 'sigma' is
        
                      mu + sigma * z,         z ~ N(0, I)
        
                 This reduces the problem to just sampling z. (Hint: use tf.random_normal!)
        """
        if self.discrete:
            sy_logits_na = policy_parameters
            # YOUR_HW2 CODE_HERE
            sy_sampled_ac = tf.squeeze(tf.multinomial(sy_logits_na, 1), axis=1)
        else:
            sy_mean, sy_logstd = policy_parameters
            # YOUR_HW2 CODE_HERE
            sy_sampled_ac = sy_mean + tf.exp(sy_logstd) * tf.random_normal(tf.shape(sy_mean))
        return sy_sampled_ac

Source File: ops.py From SSGAN-Tensorflow with MIT License

5 votes

def selu(x):
    alpha = 1.6732632423543772848170429916717
    scale = 1.0507009873554804934193349852946
    return scale * tf.where(x > 0.0, x, alpha * tf.exp(x) - alpha)

Python tensorflow.exp() Examples