Python tensorflow.python.ops.math_ops.lgamma() Examples

The following are 30 code examples of tensorflow.python.ops.math_ops.lgamma(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.python.ops.math_ops , or try the search function .
Example #1
Source File: relaxed_onehot_categorical.py    From lambda-packs with MIT License 6 votes vote down vote up
def _log_prob(self, x):
    x = self._assert_valid_sample(x)
    # broadcast logits or x if need be.
    logits = self.logits
    if (not x.get_shape().is_fully_defined() or
        not logits.get_shape().is_fully_defined() or
        x.get_shape() != logits.get_shape()):
      logits = array_ops.ones_like(x, dtype=logits.dtype) * logits
      x = array_ops.ones_like(logits, dtype=x.dtype) * x
    logits_shape = array_ops.shape(math_ops.reduce_sum(logits, axis=[-1]))
    logits_2d = array_ops.reshape(logits, [-1, self.event_size])
    x_2d = array_ops.reshape(x, [-1, self.event_size])
    # compute the normalization constant
    k = math_ops.cast(self.event_size, x.dtype)
    log_norm_const = (math_ops.lgamma(k)
                      + (k - 1.)
                      * math_ops.log(self.temperature))
    # compute the unnormalized density
    log_softmax = nn_ops.log_softmax(logits_2d - x_2d * self._temperature_2d)
    log_unnorm_prob = math_ops.reduce_sum(log_softmax, [-1], keep_dims=False)
    # combine unnormalized density with normalization constant
    log_prob = log_norm_const + log_unnorm_prob
    # Reshapes log_prob to be consistent with shape of user-supplied logits
    ret = array_ops.reshape(log_prob, logits_shape)
    return ret 
Example #2
Source File: math_grad.py    From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License 6 votes vote down vote up
def _IgammaGrad(op, grad):
  """Returns gradient of igamma(a, x) with respect to x."""
  # TODO(ebrevdo): Perhaps add the derivative w.r.t. a
  a = op.inputs[0]
  x = op.inputs[1]
  sa = array_ops.shape(a)
  sx = array_ops.shape(x)
  # pylint: disable=protected-access
  unused_ra, rx = gen_array_ops._broadcast_gradient_args(sa, sx)
  # pylint: enable=protected-access

  # Perform operations in log space before summing, because Gamma(a)
  # and Gamma'(a) can grow large.
  partial_x = math_ops.exp(-x + (a - 1) * math_ops.log(x) - math_ops.lgamma(a))
  # TODO(b/36815900): Mark None return values as NotImplemented
  return (None,
          array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx)) 
Example #3
Source File: math_grad.py    From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License 6 votes vote down vote up
def _BetaincGrad(op, grad):
  """Returns gradient of betainc(a, b, x) with respect to x."""
  # TODO(ebrevdo): Perhaps add the derivative w.r.t. a, b
  a, b, x = op.inputs

  # two cases: x is a scalar and a/b are same-shaped tensors, or vice
  # versa; so its sufficient to check against shape(a).
  sa = array_ops.shape(a)
  sx = array_ops.shape(x)
  # pylint: disable=protected-access
  _, rx = gen_array_ops._broadcast_gradient_args(sa, sx)
  # pylint: enable=protected-access

  # Perform operations in log space before summing, because terms
  # can grow large.
  log_beta = (gen_math_ops.lgamma(a) + gen_math_ops.lgamma(b)
              - gen_math_ops.lgamma(a + b))
  partial_x = math_ops.exp(
      (b - 1) * math_ops.log(1 - x) + (a - 1) * math_ops.log(x) - log_beta)

  # TODO(b/36815900): Mark None return values as NotImplemented
  return (None,  # da
          None,  # db
          array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx)) 
Example #4
Source File: gamma.py    From auto-alt-text-lambda-api with MIT License 6 votes vote down vote up
def _kl_gamma_gamma(g0, g1, name=None):
  """Calculate the batched KL divergence KL(g0 || g1) with g0 and g1 Gamma.

  Args:
    g0: instance of a Gamma distribution object.
    g1: instance of a Gamma distribution object.
    name: (optional) Name to use for created operations.
      Default is "kl_gamma_gamma".

  Returns:
    kl_gamma_gamma: `Tensor`. The batchwise KL(g0 || g1).
  """
  with ops.name_scope(name, "kl_gamma_gamma",
                      values=[g0.alpha, g0.beta, g1.alpha, g1.beta]):
    # Result from:
    #   http://www.fil.ion.ucl.ac.uk/~wpenny/publications/densities.ps
    # For derivation see:
    #   http://stats.stackexchange.com/questions/11646/kullback-leibler-divergence-between-two-gamma-distributions   pylint: disable=line-too-long
    return ((g0.alpha - g1.alpha) * math_ops.digamma(g0.alpha)
            + math_ops.lgamma(g1.alpha)
            - math_ops.lgamma(g0.alpha)
            + g1.alpha * math_ops.log(g0.beta)
            - g1.alpha * math_ops.log(g1.beta)
            + g0.alpha * (g1.beta / g0.beta - 1.)) 
Example #5
Source File: beta.py    From auto-alt-text-lambda-api with MIT License 6 votes vote down vote up
def _kl_beta_beta(d1, d2, name=None):
  """Calculate the batched KL divergence KL(d1 || d2) with d1 and d2 Beta.

  Args:
    d1: instance of a Beta distribution object.
    d2: instance of a Beta distribution object.
    name: (optional) Name to use for created operations.
      default is "kl_beta_beta".

  Returns:
    Batchwise KL(d1 || d2)
  """
  inputs = [d1.a, d1.b, d1.a_b_sum, d2.a_b_sum]
  with ops.name_scope(name, "kl_beta_beta", inputs):
    # ln(B(a', b') / B(a, b))
    log_betas = (math_ops.lgamma(d2.a) + math_ops.lgamma(d2.b)
                - math_ops.lgamma(d2.a_b_sum) + math_ops.lgamma(d1.a_b_sum)
                - math_ops.lgamma(d1.a) - math_ops.lgamma(d1.b))
    # (a - a')*psi(a) + (b - b')*psi(b) + (a' - a + b' - b)*psi(a + b)
    digammas = ((d1.a - d2.a)*math_ops.digamma(d1.a)
              + (d1.b - d2.b)*math_ops.digamma(d1.b)
              + (d2.a_b_sum - d1.a_b_sum)*math_ops.digamma(d1.a_b_sum))
    return log_betas + digammas 
Example #6
Source File: gamma.py    From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License 6 votes vote down vote up
def _kl_gamma_gamma(g0, g1, name=None):
  """Calculate the batched KL divergence KL(g0 || g1) with g0 and g1 Gamma.

  Args:
    g0: instance of a Gamma distribution object.
    g1: instance of a Gamma distribution object.
    name: (optional) Name to use for created operations.
      Default is "kl_gamma_gamma".

  Returns:
    kl_gamma_gamma: `Tensor`. The batchwise KL(g0 || g1).
  """
  with ops.name_scope(name, "kl_gamma_gamma", values=[
      g0.concentration, g0.rate, g1.concentration, g1.rate]):
    # Result from:
    #   http://www.fil.ion.ucl.ac.uk/~wpenny/publications/densities.ps
    # For derivation see:
    #   http://stats.stackexchange.com/questions/11646/kullback-leibler-divergence-between-two-gamma-distributions   pylint: disable=line-too-long
    return (((g0.concentration - g1.concentration)
             * math_ops.digamma(g0.concentration))
            + math_ops.lgamma(g1.concentration)
            - math_ops.lgamma(g0.concentration)
            + g1.concentration * math_ops.log(g0.rate)
            - g1.concentration * math_ops.log(g1.rate)
            + g0.concentration * (g1.rate / g0.rate - 1.)) 
Example #7
Source File: gamma.py    From keras-lambda with MIT License 6 votes vote down vote up
def _kl_gamma_gamma(g0, g1, name=None):
  """Calculate the batched KL divergence KL(g0 || g1) with g0 and g1 Gamma.

  Args:
    g0: instance of a Gamma distribution object.
    g1: instance of a Gamma distribution object.
    name: (optional) Name to use for created operations.
      Default is "kl_gamma_gamma".

  Returns:
    kl_gamma_gamma: `Tensor`. The batchwise KL(g0 || g1).
  """
  with ops.name_scope(name, "kl_gamma_gamma",
                      values=[g0.alpha, g0.beta, g1.alpha, g1.beta]):
    # Result from:
    #   http://www.fil.ion.ucl.ac.uk/~wpenny/publications/densities.ps
    # For derivation see:
    #   http://stats.stackexchange.com/questions/11646/kullback-leibler-divergence-between-two-gamma-distributions   pylint: disable=line-too-long
    return ((g0.alpha - g1.alpha) * math_ops.digamma(g0.alpha)
            + math_ops.lgamma(g1.alpha)
            - math_ops.lgamma(g0.alpha)
            + g1.alpha * math_ops.log(g0.beta)
            - g1.alpha * math_ops.log(g1.beta)
            + g0.alpha * (g1.beta / g0.beta - 1.)) 
Example #8
Source File: math_grad.py    From lambda-packs with MIT License 6 votes vote down vote up
def _BetaincGrad(op, grad):
  """Returns gradient of betainc(a, b, x) with respect to x."""
  # TODO(ebrevdo): Perhaps add the derivative w.r.t. a, b
  a, b, x = op.inputs

  # two cases: x is a scalar and a/b are same-shaped tensors, or vice
  # versa; so its sufficient to check against shape(a).
  sa = array_ops.shape(a)
  sx = array_ops.shape(x)
  # pylint: disable=protected-access
  _, rx = gen_array_ops._broadcast_gradient_args(sa, sx)
  # pylint: enable=protected-access

  # Perform operations in log space before summing, because terms
  # can grow large.
  log_beta = (gen_math_ops.lgamma(a) + gen_math_ops.lgamma(b)
              - gen_math_ops.lgamma(a + b))
  partial_x = math_ops.exp(
      (b - 1) * math_ops.log(1 - x) + (a - 1) * math_ops.log(x) - log_beta)

  # TODO(b/36815900): Mark None return values as NotImplemented
  return (None,  # da
          None,  # db
          array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx)) 
Example #9
Source File: beta.py    From keras-lambda with MIT License 6 votes vote down vote up
def _kl_beta_beta(d1, d2, name=None):
  """Calculate the batched KL divergence KL(d1 || d2) with d1 and d2 Beta.

  Args:
    d1: instance of a Beta distribution object.
    d2: instance of a Beta distribution object.
    name: (optional) Name to use for created operations.
      default is "kl_beta_beta".

  Returns:
    Batchwise KL(d1 || d2)
  """
  inputs = [d1.a, d1.b, d1.a_b_sum, d2.a_b_sum]
  with ops.name_scope(name, "kl_beta_beta", inputs):
    # ln(B(a', b') / B(a, b))
    log_betas = (math_ops.lgamma(d2.a) + math_ops.lgamma(d2.b)
                - math_ops.lgamma(d2.a_b_sum) + math_ops.lgamma(d1.a_b_sum)
                - math_ops.lgamma(d1.a) - math_ops.lgamma(d1.b))
    # (a - a')*psi(a) + (b - b')*psi(b) + (a' - a + b' - b)*psi(a + b)
    digammas = ((d1.a - d2.a)*math_ops.digamma(d1.a)
              + (d1.b - d2.b)*math_ops.digamma(d1.b)
              + (d2.a_b_sum - d1.a_b_sum)*math_ops.digamma(d1.a_b_sum))
    return log_betas + digammas 
Example #10
Source File: beta.py    From deep_image_model with Apache License 2.0 6 votes vote down vote up
def _kl_beta_beta(d1, d2, name=None):
  """Calculate the batched KL divergence KL(d1 || d2) with d1 and d2 Beta.

  Args:
    d1: instance of a Beta distribution object.
    d2: instance of a Beta distribution object.
    name: (optional) Name to use for created operations.
      default is "kl_beta_beta".

  Returns:
    Batchwise KL(d1 || d2)
  """
  inputs = [d1.a, d1.b, d1.a_b_sum, d2.a_b_sum]
  with ops.name_scope(name, "kl_beta_beta", inputs):
    # ln(B(a', b') / B(a, b))
    log_betas = (math_ops.lgamma(d2.a) + math_ops.lgamma(d2.b)
                - math_ops.lgamma(d2.a_b_sum) + math_ops.lgamma(d1.a_b_sum)
                - math_ops.lgamma(d1.a) - math_ops.lgamma(d1.b))
    # (a - a')*psi(a) + (b - b')*psi(b) + (a' - a + b' - b)*psi(a + b)
    digammas = ((d1.a - d2.a)*math_ops.digamma(d1.a)
              + (d1.b - d2.b)*math_ops.digamma(d1.b)
              + (d2.a_b_sum - d1.a_b_sum)*math_ops.digamma(d1.a_b_sum))
    return log_betas + digammas 
Example #11
Source File: gamma.py    From lambda-packs with MIT License 6 votes vote down vote up
def _kl_gamma_gamma(g0, g1, name=None):
  """Calculate the batched KL divergence KL(g0 || g1) with g0 and g1 Gamma.

  Args:
    g0: instance of a Gamma distribution object.
    g1: instance of a Gamma distribution object.
    name: (optional) Name to use for created operations.
      Default is "kl_gamma_gamma".

  Returns:
    kl_gamma_gamma: `Tensor`. The batchwise KL(g0 || g1).
  """
  with ops.name_scope(name, "kl_gamma_gamma", values=[
      g0.concentration, g0.rate, g1.concentration, g1.rate]):
    # Result from:
    #   http://www.fil.ion.ucl.ac.uk/~wpenny/publications/densities.ps
    # For derivation see:
    #   http://stats.stackexchange.com/questions/11646/kullback-leibler-divergence-between-two-gamma-distributions   pylint: disable=line-too-long
    return (((g0.concentration - g1.concentration)
             * math_ops.digamma(g0.concentration))
            + math_ops.lgamma(g1.concentration)
            - math_ops.lgamma(g0.concentration)
            + g1.concentration * math_ops.log(g0.rate)
            - g1.concentration * math_ops.log(g1.rate)
            + g0.concentration * (g1.rate / g0.rate - 1.)) 
Example #12
Source File: student_t.py    From deep_image_model with Apache License 2.0 5 votes vote down vote up
def _prob(self, x):
    y = (x - self.mu) / self.sigma
    half_df = 0.5 * self.df
    return (math_ops.exp(math_ops.lgamma(0.5 + half_df) -
                         math_ops.lgamma(half_df)) /
            (math_ops.sqrt(self.df) * math.sqrt(math.pi) * self.sigma) *
            math_ops.pow(1. + math_ops.square(y) / self.df, -(0.5 + half_df))) 
Example #13
Source File: binomial.py    From deep_image_model with Apache License 2.0 5 votes vote down vote up
def _log_prob(self, counts):
    counts = self._check_counts(counts)
    prob_prob = (counts * math_ops.log(self.p) +
                 (self.n - counts) * math_ops.log(1. - self.p))
    combinations = (math_ops.lgamma(self.n + 1) -
                    math_ops.lgamma(counts + 1) -
                    math_ops.lgamma(self.n - counts + 1))
    log_prob = prob_prob + combinations
    return log_prob 
Example #14
Source File: math_grad.py    From keras-lambda with MIT License 5 votes vote down vote up
def _IgammaGrad(op, grad):
  """Returns gradient of igamma(a, x) with respect to a and x."""
  # TODO(ebrevdo): Perhaps add the derivative w.r.t. a
  a = op.inputs[0]
  x = op.inputs[1]
  sa = array_ops.shape(a)
  sx = array_ops.shape(x)
  unused_ra, rx = gen_array_ops._broadcast_gradient_args(sa, sx)

  # Perform operations in log space before summing, because Gamma(a)
  # and Gamma'(a) can grow large.
  partial_x = math_ops.exp(-x + (a - 1) * math_ops.log(x) - math_ops.lgamma(a))
  return (None,
          array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx)) 
Example #15
Source File: gamma.py    From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License 5 votes vote down vote up
def _log_normalization(self):
    return (math_ops.lgamma(self.concentration)
            - self.concentration * math_ops.log(self.rate)) 
Example #16
Source File: gamma.py    From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License 5 votes vote down vote up
def _entropy(self):
    return (self.concentration
            - math_ops.log(self.rate)
            + math_ops.lgamma(self.concentration)
            + ((1. - self.concentration) *
               math_ops.digamma(self.concentration))) 
Example #17
Source File: poisson.py    From deep_image_model with Apache License 2.0 5 votes vote down vote up
def _log_prob(self, x):
    x = self._assert_valid_sample(x, check_integer=True)
    return x * math_ops.log(self.lam) - self.lam - math_ops.lgamma(x + 1) 
Example #18
Source File: util.py    From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License 5 votes vote down vote up
def log_combinations(n, counts, name="log_combinations"):
  """Multinomial coefficient.

  Given `n` and `counts`, where `counts` has last dimension `k`, we compute
  the multinomial coefficient as:

  ```n! / sum_i n_i!```

  where `i` runs over all `k` classes.

  Args:
    n: Floating-point `Tensor` broadcastable with `counts`. This represents `n`
      outcomes.
    counts: Floating-point `Tensor` broadcastable with `n`. This represents
      counts in `k` classes, where `k` is the last dimension of the tensor.
    name: A name for this operation (optional).

  Returns:
    `Tensor` representing the multinomial coefficient between `n` and `counts`.
  """
  # First a bit about the number of ways counts could have come in:
  # E.g. if counts = [1, 2], then this is 3 choose 2.
  # In general, this is (sum counts)! / sum(counts!)
  # The sum should be along the last dimension of counts. This is the
  # "distribution" dimension. Here n a priori represents the sum of counts.
  with ops.name_scope(name, values=[n, counts]):
    n = ops.convert_to_tensor(n, name="n")
    counts = ops.convert_to_tensor(counts, name="counts")
    total_permutations = math_ops.lgamma(n + 1)
    counts_factorial = math_ops.lgamma(counts + 1)
    redundant_permutations = math_ops.reduce_sum(counts_factorial, axis=[-1])
    return total_permutations - redundant_permutations 
Example #19
Source File: beta.py    From deep_image_model with Apache License 2.0 5 votes vote down vote up
def _entropy(self):
    return (math_ops.lgamma(self.a) -
            (self.a - 1.) * math_ops.digamma(self.a) +
            math_ops.lgamma(self.b) -
            (self.b - 1.) * math_ops.digamma(self.b) -
            math_ops.lgamma(self.a_b_sum) +
            (self.a_b_sum - 2.) * math_ops.digamma(self.a_b_sum)) 
Example #20
Source File: beta.py    From deep_image_model with Apache License 2.0 5 votes vote down vote up
def _log_prob(self, x):
    x = self._assert_valid_sample(x)
    log_unnormalized_prob = ((self.a - 1.) * math_ops.log(x) +
                             (self.b - 1.) * math_ops.log(1. - x))
    log_normalization = (math_ops.lgamma(self.a) +
                         math_ops.lgamma(self.b) -
                         math_ops.lgamma(self.a_b_sum))
    return log_unnormalized_prob - log_normalization 
Example #21
Source File: student_t.py    From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License 5 votes vote down vote up
def _log_normalization(self):
    return (math_ops.log(math_ops.abs(self.scale)) +
            0.5 * math_ops.log(self.df) +
            0.5 * np.log(np.pi) +
            math_ops.lgamma(0.5 * self.df) -
            math_ops.lgamma(0.5 * (self.df + 1.))) 
Example #22
Source File: gamma.py    From deep_image_model with Apache License 2.0 5 votes vote down vote up
def _entropy(self):
    return (self.alpha -
            math_ops.log(self.beta) +
            math_ops.lgamma(self.alpha) +
            (1. - self.alpha) * math_ops.digamma(self.alpha)) 
Example #23
Source File: gamma.py    From keras-lambda with MIT License 5 votes vote down vote up
def _log_prob(self, x):
    x = control_flow_ops.with_dependencies([check_ops.assert_positive(x)] if
                                           self.validate_args else [], x)
    contrib_tensor_util.assert_same_float_dtype(tensors=[x],
                                                dtype=self.dtype)
    return (self.alpha * math_ops.log(self.beta) +
            (self.alpha - 1.) * math_ops.log(x) -
            self.beta * x -
            math_ops.lgamma(self.alpha)) 
Example #24
Source File: gamma.py    From keras-lambda with MIT License 5 votes vote down vote up
def _entropy(self):
    return (self.alpha -
            math_ops.log(self.beta) +
            math_ops.lgamma(self.alpha) +
            (1. - self.alpha) * math_ops.digamma(self.alpha)) 
Example #25
Source File: inverse_gamma.py    From keras-lambda with MIT License 5 votes vote down vote up
def _log_prob(self, x):
    x = control_flow_ops.with_dependencies([check_ops.assert_positive(x)] if
                                           self.validate_args else [], x)
    return (self.alpha * math_ops.log(self.beta) -
            math_ops.lgamma(self.alpha) -
            (self.alpha + 1.) * math_ops.log(x) - self.beta / x) 
Example #26
Source File: inverse_gamma.py    From keras-lambda with MIT License 5 votes vote down vote up
def _entropy(self):
    return (self.alpha +
            math_ops.log(self.beta) +
            math_ops.lgamma(self.alpha) -
            (1. + self.alpha) * math_ops.digamma(self.alpha)) 
Example #27
Source File: beta.py    From keras-lambda with MIT License 5 votes vote down vote up
def _log_prob(self, x):
    x = self._assert_valid_sample(x)
    log_unnormalized_prob = ((self.a - 1.) * math_ops.log(x) +
                             (self.b - 1.) * math_ops.log(1. - x))
    log_normalization = (math_ops.lgamma(self.a) +
                         math_ops.lgamma(self.b) -
                         math_ops.lgamma(self.a_b_sum))
    return log_unnormalized_prob - log_normalization 
Example #28
Source File: beta.py    From keras-lambda with MIT License 5 votes vote down vote up
def _entropy(self):
    return (math_ops.lgamma(self.a) -
            (self.a - 1.) * math_ops.digamma(self.a) +
            math_ops.lgamma(self.b) -
            (self.b - 1.) * math_ops.digamma(self.b) -
            math_ops.lgamma(self.a_b_sum) +
            (self.a_b_sum - 2.) * math_ops.digamma(self.a_b_sum)) 
Example #29
Source File: student_t.py    From keras-lambda with MIT License 5 votes vote down vote up
def _log_normalization(self):
    return (math_ops.log(math_ops.abs(self.sigma)) +
            0.5 * math_ops.log(self.df) +
            0.5 * np.log(np.pi) +
            math_ops.lgamma(0.5 * self.df) -
            math_ops.lgamma(0.5 * (self.df + 1.))) 
Example #30
Source File: relaxed_onehot_categorical.py    From keras-lambda with MIT License 5 votes vote down vote up
def _log_prob(self, x):
    x = ops.convert_to_tensor(x, name="x")
    x = self._assert_valid_sample(x)
    # broadcast logits or x if need be.
    logits = self.logits
    if (not x.get_shape().is_fully_defined() or
        not logits.get_shape().is_fully_defined() or
        x.get_shape() != logits.get_shape()):
      logits = array_ops.ones_like(x, dtype=logits.dtype) * logits
      x = array_ops.ones_like(logits, dtype=x.dtype) * x

    logits_shape = array_ops.shape(logits)
    if logits.get_shape().ndims == 2:
      logits_2d = logits
      x_2d = x
    else:
      logits_2d = array_ops.reshape(logits, [-1, self.num_classes])
      x_2d = array_ops.reshape(x, [-1, self.num_classes])
    # compute the normalization constant
    log_norm_const = (math_ops.lgamma(self.num_classes)
                      + (self.num_classes - 1)
                      * math_ops.log(self.temperature))
    # compute the unnormalized density
    log_softmax = nn_ops.log_softmax(logits_2d - x_2d * self.temperature)
    log_unnorm_prob = math_ops.reduce_sum(log_softmax, [-1], keep_dims=False)
    # combine unnormalized density with normalization constant
    log_prob = log_norm_const + log_unnorm_prob
    ret = array_ops.reshape(log_prob, logits_shape)
    return ret