Python tensorflow.compat.v1.reduce_sum() Examples

The following are 30 code examples of tensorflow.compat.v1.reduce_sum(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.compat.v1 , or try the search function .
Example #1
Source File: lstm.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def body(self, features):
    # TODO(lukaszkaiser): investigate this issue and repair.
    if self._hparams.initializer == "orthogonal":
      raise ValueError("LSTM models fail with orthogonal initializer.")
    train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN
    # This is a temporary fix for varying-length sequences within in a batch.
    # A more complete fix should pass a length tensor from outside so that
    # all the lstm variants can use it.
    input_shape = common_layers.shape_list(features["inputs_raw"])
    flat_input = tf.reshape(features["inputs_raw"],
                            [input_shape[0], input_shape[1]])
    inputs_length = tf.reduce_sum(tf.minimum(flat_input, 1), -1)
    target_shape = common_layers.shape_list(features["targets_raw"])
    flat_target = tf.reshape(features["targets_raw"],
                             [target_shape[0], target_shape[1]])
    targets_length = tf.reduce_sum(tf.minimum(flat_target, 1), -1)
    tf.logging.info(self._hparams)
    return lstm_seq2seq_internal_attention(
        features["inputs"], features["targets"], self._hparams, train,
        inputs_length, targets_length) 
Example #2
Source File: transformer_nat.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def vq_nearest_neighbor(x, hparams):
  """Find the nearest element in means to elements in x."""
  bottleneck_size = 2**hparams.bottleneck_bits
  means = hparams.means
  x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True)
  means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True)
  scalar_prod = tf.matmul(x, means, transpose_b=True)
  dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod
  if hparams.bottleneck_kind == "em":
    x_means_idx = tf.multinomial(-dist, num_samples=hparams.num_samples)
    x_means_hot = tf.one_hot(
        x_means_idx, depth=bottleneck_size)
    x_means_hot = tf.reduce_mean(x_means_hot, axis=1)
  else:
    x_means_idx = tf.argmax(-dist, axis=-1)
    x_means_hot = tf.one_hot(x_means_idx, depth=bottleneck_size)
  x_means = tf.matmul(x_means_hot, means)
  e_loss = tf.reduce_mean(tf.squared_difference(x, tf.stop_gradient(x_means)))
  return x_means_hot, e_loss 
Example #3
Source File: t2t_model.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def summarize_features(features, num_shards=1):
  """Generate summaries for features."""
  if not common_layers.should_generate_summaries():
    return

  with tf.name_scope("input_stats"):
    for (k, v) in sorted(six.iteritems(features)):
      if (isinstance(v, tf.Tensor) and (v.get_shape().ndims > 1) and
          (v.dtype != tf.string)):
        tf.summary.scalar("%s_batch" % k, tf.shape(v)[0] // num_shards)
        tf.summary.scalar("%s_length" % k, tf.shape(v)[1])
        nonpadding = tf.to_float(tf.not_equal(v, 0))
        nonpadding_tokens = tf.reduce_sum(nonpadding)
        tf.summary.scalar("%s_nonpadding_tokens" % k, nonpadding_tokens)
        tf.summary.scalar("%s_nonpadding_fraction" % k,
                          tf.reduce_mean(nonpadding)) 
Example #4
Source File: expert_utils.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def __init__(self, num_experts, gates):
    """Create a SparseDispatcher.

    Args:
      num_experts: an integer.
      gates: a `Tensor` of shape `[batch_size, num_experts]`.

    Returns:
      a SparseDispatcher
    """
    self._gates = gates
    self._num_experts = num_experts

    where = tf.to_int32(tf.where(tf.transpose(gates) > 0))
    self._expert_index, self._batch_index = tf.unstack(where, num=2, axis=1)
    self._part_sizes_tensor = tf.reduce_sum(tf.to_int32(gates > 0), [0])
    self._nonzero_gates = tf.gather(
        tf.reshape(self._gates, [-1]),
        self._batch_index * num_experts + self._expert_index) 
Example #5
Source File: expert_utils.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def cv_squared(x):
  """The squared coefficient of variation of a sample.

  Useful as a loss to encourage a positive distribution to be more uniform.
  Epsilons added for numerical stability.
  Returns 0 for an empty Tensor.

  Args:
    x: a `Tensor`.

  Returns:
    a `Scalar`.
  """
  epsilon = 1e-10
  float_size = tf.to_float(tf.size(x)) + epsilon
  mean = tf.reduce_sum(x) / float_size
  variance = tf.reduce_sum(tf.squared_difference(x, mean)) / float_size
  return variance / (tf.square(mean) + epsilon) 
Example #6
Source File: modalities.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def one_hot_class_label_loss(top_out,
                             targets,
                             model_hparams,
                             vocab_size,
                             weights_fn):
  """Apply softmax cross-entropy between outputs and targets.

  Args:
    top_out: logits Tensor with shape [batch, ?, ?, num_classes]
    targets: one-hot encoding Tensor with shape [batch, ?, ?, num_classes]
    model_hparams: HParams, model hyperparmeters.
    vocab_size: int, vocabulary size.
    weights_fn:

  Returns:
    loss_scale (cross-entropy), loss_denom
  """
  del model_hparams, vocab_size  # unused arg
  loss_scale = tf.losses.softmax_cross_entropy(
      onehot_labels=targets, logits=top_out)
  weights = weights_fn(targets)
  loss_denom = tf.reduce_sum(weights)
  return loss_scale, loss_denom 
Example #7
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def kl_divergence(mu, log_var, mu_p=0.0, log_var_p=0.0):
  """KL divergence of diagonal gaussian N(mu,exp(log_var)) and N(0,1).

  Args:
    mu: mu parameter of the distribution.
    log_var: log(var) parameter of the distribution.
    mu_p: optional mu from a learned prior distribution
    log_var_p: optional log(var) from a learned prior distribution
  Returns:
    the KL loss.
  """

  batch_size = shape_list(mu)[0]
  prior_distribution = tfp.distributions.Normal(
      mu_p, tf.exp(tf.multiply(0.5, log_var_p)))
  posterior_distribution = tfp.distributions.Normal(
      mu, tf.exp(tf.multiply(0.5, log_var)))
  kld = tfp.distributions.kl_divergence(posterior_distribution,
                                        prior_distribution)
  return tf.reduce_sum(kld) / to_float(batch_size) 
Example #8
Source File: modalities.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def multi_label_loss(top_out, targets, model_hparams, vocab_size, weights_fn):
  """Average loss over the labels."""
  del vocab_size  # unused arg
  logits = top_out
  num_labels = tf.shape(targets)[1]
  logits = tf.tile(logits, [1, num_labels, 1, 1, 1])

  xent, weights = common_layers.padded_cross_entropy(
      logits,
      targets,
      model_hparams.label_smoothing,
      weights_fn=weights_fn,
      reduce_sum=False,
  )
  xent = tf.squeeze(xent, [2, 3])
  weights = tf.squeeze(weights, [2, 3])
  # average loss over all labels
  loss = tf.reduce_sum(xent, axis=1)
  weights = tf.reduce_sum(weights, axis=1)
  loss /= (weights + 1e-8)
  weights = tf.to_float(tf.greater(weights, 0.))

  return tf.reduce_sum(loss*weights), tf.reduce_sum(weights) 
Example #9
Source File: transformer_glow_layers.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def actnorm(name, x, x_mask, inverse, init, logscale_factor=3.0):
  """Activation normalization, returns logabsdet of shape [B]."""
  eps = tf.keras.backend.epsilon()
  n_channels = common_layers.shape_list(x)[2]

  with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
    x_mean, x_var = gops.moments_over_bl(x, x_mask)
    b = gops.get_variable_ddi(
        "b", (n_channels), -x_mean, init, tf.zeros_initializer)
    log_w_init = -0.5 * tf.log(x_var + eps) / logscale_factor
    log_w = gops.get_variable_ddi(
        "log_w", (n_channels), log_w_init, init,
        tf.zeros_initializer) * logscale_factor

    if not inverse:
      x = (x + b) * tf.exp(log_w)
    else:
      x = x * tf.exp(-log_w) - b

    x_length = tf.reduce_sum(x_mask, -1)
    logabsdet = x_length * tf.reduce_sum(log_w)
    if inverse:
      logabsdet *= -1
    return x, logabsdet 
Example #10
Source File: common_layers_test.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def testDmlLoss(self, batch, height, width, num_mixtures, reduce_sum):
    channels = 3
    pred = tf.random_normal([batch, height, width, num_mixtures * 10])
    labels = tf.random_uniform([batch, height, width, channels],
                               minval=0, maxval=256, dtype=tf.int32)
    actual_loss_num, actual_loss_den = common_layers.dml_loss(
        pred=pred, labels=labels, reduce_sum=reduce_sum)
    actual_loss = actual_loss_num / actual_loss_den

    real_labels = common_layers.convert_rgb_to_symmetric_real(labels)
    expected_loss = common_layers.discretized_mix_logistic_loss(
        pred=pred, labels=real_labels) / channels
    if reduce_sum:
      expected_loss = tf.reduce_mean(expected_loss)

    actual_loss_val, expected_loss_val = self.evaluate(
        [actual_loss, expected_loss])
    self.assertAllClose(actual_loss_val, expected_loss_val) 
Example #11
Source File: metrics.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def padded_accuracy_topk(predictions,
                         labels,
                         k,
                         weights_fn=common_layers.weights_nonzero):
  """Percentage of times that top-k predictions matches labels on non-0s."""
  with tf.variable_scope("padded_accuracy_topk", values=[predictions, labels]):
    padded_predictions, padded_labels = common_layers.pad_with_zeros(
        predictions, labels)
    weights = weights_fn(padded_labels)
    effective_k = tf.minimum(k,
                             common_layers.shape_list(padded_predictions)[-1])
    _, outputs = tf.nn.top_k(padded_predictions, k=effective_k)
    outputs = tf.to_int32(outputs)
    padded_labels = tf.to_int32(padded_labels)
    padded_labels = tf.expand_dims(padded_labels, axis=-1)
    padded_labels += tf.zeros_like(outputs)  # Pad to same shape.
    same = tf.to_float(tf.equal(outputs, padded_labels))
    same_topk = tf.reduce_sum(same, axis=-1)
    return same_topk, weights 
Example #12
Source File: transformer_glow_layers_ops.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def reduce_sum_over_lc(x, x_mask):
  """Returns sum of x (over L and C) given the actual length and pad.

  Args:
    x: input. (B,L,C)
    x_mask: binary padding mask. (B,L)

  Returns:
    sum of x. (B)
  """

  if x.shape.rank == 3 and x_mask.shape.rank == 2:
    x_mask = x_mask[..., tf.newaxis]
  else:
    tf.logging.info("x: {}, x_mask: {}".format(x.shape.rank, x_mask.shape.rank))
    raise ValueError("Dimension not supported.")

  mean = x * x_mask
  return tf.reduce_sum(mean, axis=[1, 2])  # sum over L, C 
Example #13
Source File: transformer_glow_layers_ops.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def reduce_mean_over_bl(x, x_mask):
  """Returns average of x (over B and L) given the actual length and pad.

  Args:
    x: input. (B,L,C)
    x_mask: binary padding mask. (B,L)

  Returns:
    mean of x. (C)
  """

  if x.shape.rank == 3 and x_mask.shape.rank == 2:
    x_mask = x_mask[..., tf.newaxis]
  else:
    tf.logging.info("x: {}, x_mask: {}".format(x.shape.rank, x_mask.shape.rank))
    raise ValueError("Dimension not supported.")

  mean = x * x_mask
  mean = tf.reduce_sum(mean, axis=[0, 1])  # sum over B, L
  return mean / tf.reduce_sum(x_mask) 
Example #14
Source File: ppo.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def _distributional_to_value(value_d, size, subscale, threshold):
  """Get a scalar value out of a value distribution in distributional RL."""
  half = size // 2
  value_range = (tf.to_float(tf.range(-half, half)) + 0.5) * subscale
  probs = tf.nn.softmax(value_d)

  if threshold == 0.0:
    return tf.reduce_sum(probs * value_range, axis=-1)

  # accumulated_probs[..., i] is the sum of probabilities in buckets upto i
  # so it is the probability that value <= i'th bucket value
  accumulated_probs = tf.cumsum(probs, axis=-1)
  # New probs are 0 on all lower buckets, until the threshold
  probs = tf.where(accumulated_probs < threshold, tf.zeros_like(probs), probs)
  probs /= tf.reduce_sum(probs, axis=-1, keepdims=True)  # Re-normalize.
  return tf.reduce_sum(probs * value_range, axis=-1) 
Example #15
Source File: base.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def pixels_from_softmax(frame_logits, pure_sampling=False,
                        temperature=1.0, gumbel_noise_factor=0.2):
  """Given frame_logits from a per-pixel softmax, generate colors."""
  # If we're purely sampling, just sample each pixel.
  if pure_sampling or temperature == 0.0:
    return common_layers.sample_with_temperature(frame_logits, temperature)

  # Gumbel-sample from the pixel sofmax and average by pixel values.
  pixel_range = tf.to_float(tf.range(256))
  for _ in range(len(frame_logits.get_shape().as_list()) - 1):
    pixel_range = tf.expand_dims(pixel_range, axis=0)

  frame_logits = tf.nn.log_softmax(frame_logits)
  gumbel_samples = discretization.gumbel_sample(
      common_layers.shape_list(frame_logits)) * gumbel_noise_factor

  frame = tf.nn.softmax((frame_logits + gumbel_samples) / temperature, axis=-1)
  result = tf.reduce_sum(frame * pixel_range, axis=-1)
  # Round on the forward pass, not on the backward one.
  return result + tf.stop_gradient(tf.round(result) - result) 
Example #16
Source File: dyneval.py    From lamb with Apache License 2.0 6 votes vote down vote up
def _make_update(self):
    mss = []
    gsum = 0.0
    count = 0
    for sum_squared_grads in self._sum_squared_grads:
      ms = tf.sqrt(sum_squared_grads / self._num_squared_grads)
      gsum += tf.reduce_sum(ms)
      count += tf.reduce_sum(tf.ones_like(ms))
      mss.append(ms)
    gsum = gsum / count

    assignments = []
    for grad, var, save, sum_squared_grads, ms in zip(
        self._grads, self._vars, self._saves, self._sum_squared_grads, mss):
      decay_rate = tf.minimum(1.0, self._decay_rate*(ms/gsum))
      delta = (-self._learning_rate*grad / (ms + self._epsilon) +
               decay_rate*(save-var))
      assignments.append(var.assign_add(delta))
    return tf.group(assignments) 
Example #17
Source File: transformer_glow_layers_ops.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def reduce_sum_over_l(x, x_mask):
  """Returns sum of x (over L) given the actual length and pad.

  Args:
    x: input. (B,L,C)
    x_mask: binary padding mask. (B,L)

  Returns:
    sum of x. (B,C)
  """

  if x.shape.rank == 3 and x_mask.shape.rank == 2:
    x_mask = x_mask[..., tf.newaxis]
  else:
    tf.logging.info("x: {}, x_mask: {}".format(x.shape.rank, x_mask.shape.rank))
    raise ValueError("Dimension not supported.")

  mean = x * x_mask
  return tf.reduce_sum(mean, axis=-2)  # sum over L 
Example #18
Source File: dropout.py    From lamb with Apache License 2.0 6 votes vote down vote up
def _build(self, x, state):
    prev_keep_mask = state
    shape = tf.shape(x)
    noise = tf.random_uniform(shape, dtype=x.dtype)
    other_mask = tf.floor(self._keep_prob + noise)
    choice_noise = tf.random_uniform(shape, dtype=x.dtype)
    choice = tf.less(choice_noise, self._flip_prob)
    # KLUDGE(melisgl): The client has to pass the last keep_mask from
    # a batch to the next so the mask may end up next to some
    # recurrent cell state. This state is often zero at the beginning
    # and may be periodically zeroed (per example) during training.
    # While zeroing LSTM state is okay, zeroing the dropout mask is
    # not. So instead of forcing every client to deal with this common
    # (?) case, if an all zero mask is detected, then regenerate a
    # fresh mask. This is of course a major hack and won't help with
    # learnt initial states, for example.
    sum_ = tf.reduce_sum(prev_keep_mask, 1, keepdims=True)
    is_initializing = tf.equal(sum_, 0.0)

    self._keep_mask = tf.where(tf.logical_or(choice, is_initializing),
                               other_mask,
                               prev_keep_mask)
    self._time_step += 1
    return x * self._keep_mask / self._keep_prob * self._scaler 
Example #19
Source File: message_passing_attention.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def compute_values(edge_compatibility, v):
  """Compute values. If edge compatibilities is just adjacency, we get ggnn.

  Args:
    edge_compatibility: A tensor of shape [batch, num_transforms, length, depth]
    v: A tensor of shape [batch, num_transforms, length, depth]

  Returns:
    output: A [batch, length, depth] tensor
  """

  # Computes the incoming value vectors for each node by weighting them
  # according to the attention weights. These values are still segregated by
  # edge type.
  # Shape = [B, T, N, V].
  all_edge_values = tf.matmul(tf.to_float(edge_compatibility), v)

  # Combines the weighted value vectors together across edge types into a
  # single N x V matrix for each batch.
  output = tf.reduce_sum(all_edge_values, axis=1)  # Shape [B, N, V].
  return output 
Example #20
Source File: discretization.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def top_k_softmax(x, k):
  """Calculate softmax(x), select top-k and rescale to sum to 1.

  Args:
    x: Input to softmax over.
    k: Number of top-k to select.

  Returns:
    softmax(x) and maximum item.
  """
  x = tf.nn.softmax(x)
  top_x, _ = tf.nn.top_k(x, k=k + 1)
  min_top = tf.reduce_min(top_x, axis=-1, keep_dims=True)
  x = tf.nn.relu((x - min_top) + 1e-12)
  x /= tf.reduce_sum(x, axis=-1, keep_dims=True)
  return x, tf.reduce_max(top_x, axis=-1) 
Example #21
Source File: modalities.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def real_log_poisson_loss(top_out,
                          targets,
                          model_hparams,
                          vocab_size,
                          weights_fn):
  """Poisson loss for real."""
  del model_hparams, vocab_size  # unused arg
  predictions = top_out
  if (len(common_layers.shape_list(top_out)) != len(
      common_layers.shape_list(targets))):
    predictions = tf.squeeze(top_out, axis=[-1])
  with tf.name_scope("log_possion"):
    weights = weights_fn(targets)
    lp_loss = tf.nn.log_poisson_loss(targets, predictions)
    return tf.reduce_sum(lp_loss * weights), tf.reduce_sum(weights) 
Example #22
Source File: transformer_glow_layers_ops.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def reduce_mean_over_l(x, x_mask):
  """Returns mean of x (over L) given the actual length and pad."""
  return reduce_sum_over_l(x, x_mask) / tf.reduce_sum(x_mask, 1, keepdims=True) 
Example #23
Source File: common_layers_test.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def testDiscretizedMixLogisticLoss(self):
    batch = 2
    height = 4
    width = 4
    channels = 3
    num_mixtures = 5
    logits = tf.concat(  # assign all probability mass to first component
        [tf.ones([batch, height, width, 1]) * 1e8,
         tf.zeros([batch, height, width, num_mixtures - 1])],
        axis=-1)
    locs = tf.random_uniform([batch, height, width, num_mixtures * 3],
                             minval=-.9, maxval=.9)
    log_scales = tf.random_uniform([batch, height, width, num_mixtures * 3],
                                   minval=-1., maxval=1.)
    coeffs = tf.atanh(tf.zeros([batch, height, width, num_mixtures * 3]))
    pred = tf.concat([logits, locs, log_scales, coeffs], axis=-1)

    # Test labels that don't satisfy edge cases where 8-bit value is 0 or 255.
    labels = tf.random_uniform([batch, height, width, channels],
                               minval=-.9, maxval=.9)
    locs_0 = locs[..., :3]
    log_scales_0 = log_scales[..., :3]
    centered_labels = labels - locs_0
    inv_stdv = tf.exp(-log_scales_0)
    plus_in = inv_stdv * (centered_labels + 1. / 255.)
    min_in = inv_stdv * (centered_labels - 1. / 255.)
    cdf_plus = tf.nn.sigmoid(plus_in)
    cdf_min = tf.nn.sigmoid(min_in)
    expected_loss = -tf.reduce_sum(tf.log(cdf_plus - cdf_min), axis=-1)

    actual_loss = common_layers.discretized_mix_logistic_loss(
        pred=pred, labels=labels)
    actual_loss_val, expected_loss_val = self.evaluate(
        [actual_loss, expected_loss])
    self.assertAllClose(actual_loss_val, expected_loss_val, rtol=1e-5) 
Example #24
Source File: modalities.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def video_l1_loss(top_out, targets, model_hparams, vocab_size, weights_fn):
  """Compute loss numerator and denominator for one shard of output."""
  del vocab_size  # unused arg
  logits = top_out
  logits = tf.reshape(logits, [-1] + common_layers.shape_list(logits)[2:-1])
  targets = tf.reshape(targets, [-1] + common_layers.shape_list(targets)[2:])
  weights = weights_fn(targets)
  # Shift targets by 0.5 so later just casting to int gives the prediction.
  # So for int targets, say 0 and 7, we actually train to predict 0.5 and 7.5.
  # Later (in merics or infer) this is cast to int anyway. Also, we have no
  # loss beyond cutoff = 0.2 as these are already correct predictions.
  targets = tf.to_float(targets) + 0.5
  loss = video_l1_internal_loss(logits, targets, model_hparams)
  return tf.reduce_sum(loss * weights), tf.reduce_sum(weights) 
Example #25
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def dml_loss(pred, labels, weights_fn=_weights_one_third, reduce_sum=True):
  """Discretized mixture of logistics loss.

  Args:
    pred: A [batch, height, width, num_mixtures*10] tensor of floats
      comprising one unconstrained mixture probability, three means
      (one per channel), three standard deviations (one per channel),
      and three coefficients which linearly parameterize dependence across
      channels.
    labels: A [batch, height, width, channels] tensor of 8-bit pixel
      intensities. The computation assumes channels is 3.
    weights_fn: A function of labels, returning a Tensor of shape
      [batch, height, width] which weights each loss term. Default is to scale
      each loss term by 1/3 so that they capture the average across channels.
    reduce_sum: A boolean, to return scalar loss instead of per position.

  Returns:
    Tuple of loss tensors for numerator and denominator, each a scalar if
    reduce_sum else of shape [batch, height, width]. The sum of their divisions
    is the number of nats for each pixel in labels.
  """
  real_labels = convert_rgb_to_symmetric_real(labels)
  dml_loss_value = discretized_mix_logistic_loss(pred=pred, labels=real_labels)
  weights = weights_fn(labels)
  loss_num = weights * dml_loss_value
  loss_den = weights_nonzero(weights)
  if reduce_sum:
    loss_num = tf.reduce_sum(loss_num)
    loss_den = tf.reduce_sum(loss_den)
  return loss_num, loss_den 
Example #26
Source File: modalities.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def sigmoid_class_label_loss(top_out,
                             targets,
                             model_hparams,
                             vocab_size,
                             weights_fn):
  """Loss for class label."""
  # Expect inputs of size [batch-size, timesteps, 1, num-classes], where the
  # last dimension of num-classes represents logits for binary labels
  del model_hparams, vocab_size  # unused arg
  loss_scale = tf.losses.sigmoid_cross_entropy(
      multi_class_labels=targets, logits=top_out)
  weights = weights_fn(targets)
  loss_denom = tf.reduce_sum(weights)
  return loss_scale, loss_denom 
Example #27
Source File: modalities.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def real_l2_loss(top_out, targets, model_hparams, vocab_size, weights_fn):
  del model_hparams, vocab_size  # unused arg
  predictions = top_out
  if (len(common_layers.shape_list(top_out)) != len(
      common_layers.shape_list(targets))):
    predictions = tf.squeeze(top_out, axis=[-1])
  with tf.name_scope("l2"):
    weights = weights_fn(targets)
    l2 = tf.pow(predictions - targets, 2)
    return tf.reduce_sum(l2 * weights), tf.reduce_sum(weights) 
Example #28
Source File: base.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def body(self, features):
    self.has_actions = "input_action" in features
    self.has_rewards = "target_reward" in features
    self.has_policies = "target_policy" in features
    self.has_values = "target_value" in features
    hparams = self.hparams

    def merge(inputs, targets):
      """Split inputs and targets into lists."""
      inputs = tf.unstack(inputs, axis=1)
      targets = tf.unstack(targets, axis=1)
      assert len(inputs) == hparams.video_num_input_frames
      assert len(targets) == hparams.video_num_target_frames
      return inputs + targets

    frames = merge(features["inputs"], features["targets"])
    frames_raw = merge(features["inputs_raw"], features["targets_raw"])
    actions, rewards = None, None
    if self.has_actions:
      actions = merge(features["input_action"], features["target_action"])
    if self.has_rewards:
      rewards = merge(features["input_reward"], features["target_reward"])

    # Reset the internal states if the reset_internal_states has been
    # passed as a feature and has greater value than 0.
    if self.is_recurrent_model and self.internal_states is not None:
      def reset_func():
        reset_ops = flat_lists(self.reset_internal_states_ops())
        with tf.control_dependencies(reset_ops):
          return tf.no_op()
      if self.is_predicting and "reset_internal_states" in features:
        reset = features["reset_internal_states"]
        reset = tf.greater(tf.reduce_sum(reset), 0.5)
        reset_ops = tf.cond(reset, reset_func, tf.no_op)
      else:
        reset_ops = tf.no_op()
      with tf.control_dependencies([reset_ops]):
        frames[0] = tf.identity(frames[0])

    with tf.control_dependencies([frames[0]]):
      return self.__process(frames, actions, rewards, frames_raw) 
Example #29
Source File: epva.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def mean_squared_error(true, pred):
  """L2 distance between tensors true and pred.

  Args:
    true: the ground truth image.
    pred: the predicted image.
  Returns:
    mean squared error between ground truth and predicted image.
  """
  result = tf.reduce_sum(
      tf.squared_difference(true, pred)) / tf.to_float(tf.size(pred))
  return result 
Example #30
Source File: epva.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def l1_error(true, pred):
  """L1 distance between tensors true and pred."""
  return tf.reduce_sum(tf.abs(true - pred)) / tf.to_float(tf.size(pred))