Python Examples of tensorflow.compat.v1.reduce

Source File: lstm.py From tensor2tensor with Apache License 2.0

6 votes

def body(self, features):
    # TODO(lukaszkaiser): investigate this issue and repair.
    if self._hparams.initializer == "orthogonal":
      raise ValueError("LSTM models fail with orthogonal initializer.")
    train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN
    # This is a temporary fix for varying-length sequences within in a batch.
    # A more complete fix should pass a length tensor from outside so that
    # all the lstm variants can use it.
    input_shape = common_layers.shape_list(features["inputs_raw"])
    flat_input = tf.reshape(features["inputs_raw"],
                            [input_shape[0], input_shape[1]])
    inputs_length = tf.reduce_sum(tf.minimum(flat_input, 1), -1)
    target_shape = common_layers.shape_list(features["targets_raw"])
    flat_target = tf.reshape(features["targets_raw"],
                             [target_shape[0], target_shape[1]])
    targets_length = tf.reduce_sum(tf.minimum(flat_target, 1), -1)
    tf.logging.info(self._hparams)
    return lstm_seq2seq_internal_attention(
        features["inputs"], features["targets"], self._hparams, train,
        inputs_length, targets_length)

Source File: transformer_nat.py From tensor2tensor with Apache License 2.0

6 votes

def vq_nearest_neighbor(x, hparams):
  """Find the nearest element in means to elements in x."""
  bottleneck_size = 2**hparams.bottleneck_bits
  means = hparams.means
  x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True)
  means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True)
  scalar_prod = tf.matmul(x, means, transpose_b=True)
  dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod
  if hparams.bottleneck_kind == "em":
    x_means_idx = tf.multinomial(-dist, num_samples=hparams.num_samples)
    x_means_hot = tf.one_hot(
        x_means_idx, depth=bottleneck_size)
    x_means_hot = tf.reduce_mean(x_means_hot, axis=1)
  else:
    x_means_idx = tf.argmax(-dist, axis=-1)
    x_means_hot = tf.one_hot(x_means_idx, depth=bottleneck_size)
  x_means = tf.matmul(x_means_hot, means)
  e_loss = tf.reduce_mean(tf.squared_difference(x, tf.stop_gradient(x_means)))
  return x_means_hot, e_loss

Source File: t2t_model.py From tensor2tensor with Apache License 2.0

6 votes

def summarize_features(features, num_shards=1):
  """Generate summaries for features."""
  if not common_layers.should_generate_summaries():
    return

  with tf.name_scope("input_stats"):
    for (k, v) in sorted(six.iteritems(features)):
      if (isinstance(v, tf.Tensor) and (v.get_shape().ndims > 1) and
          (v.dtype != tf.string)):
        tf.summary.scalar("%s_batch" % k, tf.shape(v)[0] // num_shards)
        tf.summary.scalar("%s_length" % k, tf.shape(v)[1])
        nonpadding = tf.to_float(tf.not_equal(v, 0))
        nonpadding_tokens = tf.reduce_sum(nonpadding)
        tf.summary.scalar("%s_nonpadding_tokens" % k, nonpadding_tokens)
        tf.summary.scalar("%s_nonpadding_fraction" % k,
                          tf.reduce_mean(nonpadding))

Source File: expert_utils.py From tensor2tensor with Apache License 2.0

6 votes

def __init__(self, num_experts, gates):
    """Create a SparseDispatcher.

    Args:
      num_experts: an integer.
      gates: a `Tensor` of shape `[batch_size, num_experts]`.

    Returns:
      a SparseDispatcher
    """
    self._gates = gates
    self._num_experts = num_experts

    where = tf.to_int32(tf.where(tf.transpose(gates) > 0))
    self._expert_index, self._batch_index = tf.unstack(where, num=2, axis=1)
    self._part_sizes_tensor = tf.reduce_sum(tf.to_int32(gates > 0), [0])
    self._nonzero_gates = tf.gather(
        tf.reshape(self._gates, [-1]),
        self._batch_index * num_experts + self._expert_index)

Source File: expert_utils.py From tensor2tensor with Apache License 2.0

6 votes

def cv_squared(x):
  """The squared coefficient of variation of a sample.

  Useful as a loss to encourage a positive distribution to be more uniform.
  Epsilons added for numerical stability.
  Returns 0 for an empty Tensor.

  Args:
    x: a `Tensor`.

  Returns:
    a `Scalar`.
  """
  epsilon = 1e-10
  float_size = tf.to_float(tf.size(x)) + epsilon
  mean = tf.reduce_sum(x) / float_size
  variance = tf.reduce_sum(tf.squared_difference(x, mean)) / float_size
  return variance / (tf.square(mean) + epsilon)

Source File: modalities.py From tensor2tensor with Apache License 2.0

6 votes

def one_hot_class_label_loss(top_out,
                             targets,
                             model_hparams,
                             vocab_size,
                             weights_fn):
  """Apply softmax cross-entropy between outputs and targets.

  Args:
    top_out: logits Tensor with shape [batch, ?, ?, num_classes]
    targets: one-hot encoding Tensor with shape [batch, ?, ?, num_classes]
    model_hparams: HParams, model hyperparmeters.
    vocab_size: int, vocabulary size.
    weights_fn:

  Returns:
    loss_scale (cross-entropy), loss_denom
  """
  del model_hparams, vocab_size  # unused arg
  loss_scale = tf.losses.softmax_cross_entropy(
      onehot_labels=targets, logits=top_out)
  weights = weights_fn(targets)
  loss_denom = tf.reduce_sum(weights)
  return loss_scale, loss_denom

Source File: common_layers.py From tensor2tensor with Apache License 2.0

6 votes

def kl_divergence(mu, log_var, mu_p=0.0, log_var_p=0.0):
  """KL divergence of diagonal gaussian N(mu,exp(log_var)) and N(0,1).

  Args:
    mu: mu parameter of the distribution.
    log_var: log(var) parameter of the distribution.
    mu_p: optional mu from a learned prior distribution
    log_var_p: optional log(var) from a learned prior distribution
  Returns:
    the KL loss.
  """

  batch_size = shape_list(mu)[0]
  prior_distribution = tfp.distributions.Normal(
      mu_p, tf.exp(tf.multiply(0.5, log_var_p)))
  posterior_distribution = tfp.distributions.Normal(
      mu, tf.exp(tf.multiply(0.5, log_var)))
  kld = tfp.distributions.kl_divergence(posterior_distribution,
                                        prior_distribution)
  return tf.reduce_sum(kld) / to_float(batch_size)

Source File: modalities.py From tensor2tensor with Apache License 2.0

6 votes

def multi_label_loss(top_out, targets, model_hparams, vocab_size, weights_fn):
  """Average loss over the labels."""
  del vocab_size  # unused arg
  logits = top_out
  num_labels = tf.shape(targets)[1]
  logits = tf.tile(logits, [1, num_labels, 1, 1, 1])

  xent, weights = common_layers.padded_cross_entropy(
      logits,
      targets,
      model_hparams.label_smoothing,
      weights_fn=weights_fn,
      reduce_sum=False,
  )
  xent = tf.squeeze(xent, [2, 3])
  weights = tf.squeeze(weights, [2, 3])
  # average loss over all labels
  loss = tf.reduce_sum(xent, axis=1)
  weights = tf.reduce_sum(weights, axis=1)
  loss /= (weights + 1e-8)
  weights = tf.to_float(tf.greater(weights, 0.))

  return tf.reduce_sum(loss*weights), tf.reduce_sum(weights)

Source File: transformer_glow_layers.py From tensor2tensor with Apache License 2.0

6 votes

def actnorm(name, x, x_mask, inverse, init, logscale_factor=3.0):
  """Activation normalization, returns logabsdet of shape [B]."""
  eps = tf.keras.backend.epsilon()
  n_channels = common_layers.shape_list(x)[2]

  with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
    x_mean, x_var = gops.moments_over_bl(x, x_mask)
    b = gops.get_variable_ddi(
        "b", (n_channels), -x_mean, init, tf.zeros_initializer)
    log_w_init = -0.5 * tf.log(x_var + eps) / logscale_factor
    log_w = gops.get_variable_ddi(
        "log_w", (n_channels), log_w_init, init,
        tf.zeros_initializer) * logscale_factor

    if not inverse:
      x = (x + b) * tf.exp(log_w)
    else:
      x = x * tf.exp(-log_w) - b

    x_length = tf.reduce_sum(x_mask, -1)
    logabsdet = x_length * tf.reduce_sum(log_w)
    if inverse:
      logabsdet *= -1
    return x, logabsdet

Source File: common_layers_test.py From tensor2tensor with Apache License 2.0

6 votes

def testDmlLoss(self, batch, height, width, num_mixtures, reduce_sum):
    channels = 3
    pred = tf.random_normal([batch, height, width, num_mixtures * 10])
    labels = tf.random_uniform([batch, height, width, channels],
                               minval=0, maxval=256, dtype=tf.int32)
    actual_loss_num, actual_loss_den = common_layers.dml_loss(
        pred=pred, labels=labels, reduce_sum=reduce_sum)
    actual_loss = actual_loss_num / actual_loss_den

    real_labels = common_layers.convert_rgb_to_symmetric_real(labels)
    expected_loss = common_layers.discretized_mix_logistic_loss(
        pred=pred, labels=real_labels) / channels
    if reduce_sum:
      expected_loss = tf.reduce_mean(expected_loss)

    actual_loss_val, expected_loss_val = self.evaluate(
        [actual_loss, expected_loss])
    self.assertAllClose(actual_loss_val, expected_loss_val)

Source File: metrics.py From tensor2tensor with Apache License 2.0

6 votes

def padded_accuracy_topk(predictions,
                         labels,
                         k,
                         weights_fn=common_layers.weights_nonzero):
  """Percentage of times that top-k predictions matches labels on non-0s."""
  with tf.variable_scope("padded_accuracy_topk", values=[predictions, labels]):
    padded_predictions, padded_labels = common_layers.pad_with_zeros(
        predictions, labels)
    weights = weights_fn(padded_labels)
    effective_k = tf.minimum(k,
                             common_layers.shape_list(padded_predictions)[-1])
    _, outputs = tf.nn.top_k(padded_predictions, k=effective_k)
    outputs = tf.to_int32(outputs)
    padded_labels = tf.to_int32(padded_labels)
    padded_labels = tf.expand_dims(padded_labels, axis=-1)
    padded_labels += tf.zeros_like(outputs)  # Pad to same shape.
    same = tf.to_float(tf.equal(outputs, padded_labels))
    same_topk = tf.reduce_sum(same, axis=-1)
    return same_topk, weights

Source File: transformer_glow_layers_ops.py From tensor2tensor with Apache License 2.0

6 votes

def reduce_sum_over_lc(x, x_mask):
  """Returns sum of x (over L and C) given the actual length and pad.

  Args:
    x: input. (B,L,C)
    x_mask: binary padding mask. (B,L)

  Returns:
    sum of x. (B)
  """

  if x.shape.rank == 3 and x_mask.shape.rank == 2:
    x_mask = x_mask[..., tf.newaxis]
  else:
    tf.logging.info("x: {}, x_mask: {}".format(x.shape.rank, x_mask.shape.rank))
    raise ValueError("Dimension not supported.")

  mean = x * x_mask
  return tf.reduce_sum(mean, axis=[1, 2])  # sum over L, C

Source File: transformer_glow_layers_ops.py From tensor2tensor with Apache License 2.0

6 votes

def reduce_mean_over_bl(x, x_mask):
  """Returns average of x (over B and L) given the actual length and pad.

  Args:
    x: input. (B,L,C)
    x_mask: binary padding mask. (B,L)

  Returns:
    mean of x. (C)
  """

  if x.shape.rank == 3 and x_mask.shape.rank == 2:
    x_mask = x_mask[..., tf.newaxis]
  else:
    tf.logging.info("x: {}, x_mask: {}".format(x.shape.rank, x_mask.shape.rank))
    raise ValueError("Dimension not supported.")

  mean = x * x_mask
  mean = tf.reduce_sum(mean, axis=[0, 1])  # sum over B, L
  return mean / tf.reduce_sum(x_mask)

Source File: ppo.py From tensor2tensor with Apache License 2.0

6 votes

def _distributional_to_value(value_d, size, subscale, threshold):
  """Get a scalar value out of a value distribution in distributional RL."""
  half = size // 2
  value_range = (tf.to_float(tf.range(-half, half)) + 0.5) * subscale
  probs = tf.nn.softmax(value_d)

  if threshold == 0.0:
    return tf.reduce_sum(probs * value_range, axis=-1)

  # accumulated_probs[..., i] is the sum of probabilities in buckets upto i
  # so it is the probability that value <= i'th bucket value
  accumulated_probs = tf.cumsum(probs, axis=-1)
  # New probs are 0 on all lower buckets, until the threshold
  probs = tf.where(accumulated_probs < threshold, tf.zeros_like(probs), probs)
  probs /= tf.reduce_sum(probs, axis=-1, keepdims=True)  # Re-normalize.
  return tf.reduce_sum(probs * value_range, axis=-1)

Source File: base.py From tensor2tensor with Apache License 2.0

6 votes

def pixels_from_softmax(frame_logits, pure_sampling=False,
                        temperature=1.0, gumbel_noise_factor=0.2):
  """Given frame_logits from a per-pixel softmax, generate colors."""
  # If we're purely sampling, just sample each pixel.
  if pure_sampling or temperature == 0.0:
    return common_layers.sample_with_temperature(frame_logits, temperature)

  # Gumbel-sample from the pixel sofmax and average by pixel values.
  pixel_range = tf.to_float(tf.range(256))
  for _ in range(len(frame_logits.get_shape().as_list()) - 1):
    pixel_range = tf.expand_dims(pixel_range, axis=0)

  frame_logits = tf.nn.log_softmax(frame_logits)
  gumbel_samples = discretization.gumbel_sample(
      common_layers.shape_list(frame_logits)) * gumbel_noise_factor

  frame = tf.nn.softmax((frame_logits + gumbel_samples) / temperature, axis=-1)
  result = tf.reduce_sum(frame * pixel_range, axis=-1)
  # Round on the forward pass, not on the backward one.
  return result + tf.stop_gradient(tf.round(result) - result)

Source File: dyneval.py From lamb with Apache License 2.0

6 votes

def _make_update(self):
    mss = []
    gsum = 0.0
    count = 0
    for sum_squared_grads in self._sum_squared_grads:
      ms = tf.sqrt(sum_squared_grads / self._num_squared_grads)
      gsum += tf.reduce_sum(ms)
      count += tf.reduce_sum(tf.ones_like(ms))
      mss.append(ms)
    gsum = gsum / count

    assignments = []
    for grad, var, save, sum_squared_grads, ms in zip(
        self._grads, self._vars, self._saves, self._sum_squared_grads, mss):
      decay_rate = tf.minimum(1.0, self._decay_rate*(ms/gsum))
      delta = (-self._learning_rate*grad / (ms + self._epsilon) +
               decay_rate*(save-var))
      assignments.append(var.assign_add(delta))
    return tf.group(assignments)

Source File: transformer_glow_layers_ops.py From tensor2tensor with Apache License 2.0

6 votes

def reduce_sum_over_l(x, x_mask):
  """Returns sum of x (over L) given the actual length and pad.

  Args:
    x: input. (B,L,C)
    x_mask: binary padding mask. (B,L)

  Returns:
    sum of x. (B,C)
  """

  if x.shape.rank == 3 and x_mask.shape.rank == 2:
    x_mask = x_mask[..., tf.newaxis]
  else:
    tf.logging.info("x: {}, x_mask: {}".format(x.shape.rank, x_mask.shape.rank))
    raise ValueError("Dimension not supported.")

  mean = x * x_mask
  return tf.reduce_sum(mean, axis=-2)  # sum over L

Source File: dropout.py From lamb with Apache License 2.0

6 votes

def _build(self, x, state):
    prev_keep_mask = state
    shape = tf.shape(x)
    noise = tf.random_uniform(shape, dtype=x.dtype)
    other_mask = tf.floor(self._keep_prob + noise)
    choice_noise = tf.random_uniform(shape, dtype=x.dtype)
    choice = tf.less(choice_noise, self._flip_prob)
    # KLUDGE(melisgl): The client has to pass the last keep_mask from
    # a batch to the next so the mask may end up next to some
    # recurrent cell state. This state is often zero at the beginning
    # and may be periodically zeroed (per example) during training.
    # While zeroing LSTM state is okay, zeroing the dropout mask is
    # not. So instead of forcing every client to deal with this common
    # (?) case, if an all zero mask is detected, then regenerate a
    # fresh mask. This is of course a major hack and won't help with
    # learnt initial states, for example.
    sum_ = tf.reduce_sum(prev_keep_mask, 1, keepdims=True)
    is_initializing = tf.equal(sum_, 0.0)

    self._keep_mask = tf.where(tf.logical_or(choice, is_initializing),
                               other_mask,
                               prev_keep_mask)
    self._time_step += 1
    return x * self._keep_mask / self._keep_prob * self._scaler

Source File: message_passing_attention.py From tensor2tensor with Apache License 2.0

6 votes

def compute_values(edge_compatibility, v):
  """Compute values. If edge compatibilities is just adjacency, we get ggnn.

  Args:
    edge_compatibility: A tensor of shape [batch, num_transforms, length, depth]
    v: A tensor of shape [batch, num_transforms, length, depth]

  Returns:
    output: A [batch, length, depth] tensor
  """

  # Computes the incoming value vectors for each node by weighting them
  # according to the attention weights. These values are still segregated by
  # edge type.
  # Shape = [B, T, N, V].
  all_edge_values = tf.matmul(tf.to_float(edge_compatibility), v)

  # Combines the weighted value vectors together across edge types into a
  # single N x V matrix for each batch.
  output = tf.reduce_sum(all_edge_values, axis=1)  # Shape [B, N, V].
  return output

Source File: discretization.py From tensor2tensor with Apache License 2.0

6 votes

def top_k_softmax(x, k):
  """Calculate softmax(x), select top-k and rescale to sum to 1.

  Args:
    x: Input to softmax over.
    k: Number of top-k to select.

  Returns:
    softmax(x) and maximum item.
  """
  x = tf.nn.softmax(x)
  top_x, _ = tf.nn.top_k(x, k=k + 1)
  min_top = tf.reduce_min(top_x, axis=-1, keep_dims=True)
  x = tf.nn.relu((x - min_top) + 1e-12)
  x /= tf.reduce_sum(x, axis=-1, keep_dims=True)
  return x, tf.reduce_max(top_x, axis=-1)

Source File: modalities.py From tensor2tensor with Apache License 2.0

5 votes

def real_log_poisson_loss(top_out,
                          targets,
                          model_hparams,
                          vocab_size,
                          weights_fn):
  """Poisson loss for real."""
  del model_hparams, vocab_size  # unused arg
  predictions = top_out
  if (len(common_layers.shape_list(top_out)) != len(
      common_layers.shape_list(targets))):
    predictions = tf.squeeze(top_out, axis=[-1])
  with tf.name_scope("log_possion"):
    weights = weights_fn(targets)
    lp_loss = tf.nn.log_poisson_loss(targets, predictions)
    return tf.reduce_sum(lp_loss * weights), tf.reduce_sum(weights)

Source File: transformer_glow_layers_ops.py From tensor2tensor with Apache License 2.0

5 votes

def reduce_mean_over_l(x, x_mask):
  """Returns mean of x (over L) given the actual length and pad."""
  return reduce_sum_over_l(x, x_mask) / tf.reduce_sum(x_mask, 1, keepdims=True)

Source File: common_layers_test.py From tensor2tensor with Apache License 2.0

5 votes

def testDiscretizedMixLogisticLoss(self):
    batch = 2
    height = 4
    width = 4
    channels = 3
    num_mixtures = 5
    logits = tf.concat(  # assign all probability mass to first component
        [tf.ones([batch, height, width, 1]) * 1e8,
         tf.zeros([batch, height, width, num_mixtures - 1])],
        axis=-1)
    locs = tf.random_uniform([batch, height, width, num_mixtures * 3],
                             minval=-.9, maxval=.9)
    log_scales = tf.random_uniform([batch, height, width, num_mixtures * 3],
                                   minval=-1., maxval=1.)
    coeffs = tf.atanh(tf.zeros([batch, height, width, num_mixtures * 3]))
    pred = tf.concat([logits, locs, log_scales, coeffs], axis=-1)

    # Test labels that don't satisfy edge cases where 8-bit value is 0 or 255.
    labels = tf.random_uniform([batch, height, width, channels],
                               minval=-.9, maxval=.9)
    locs_0 = locs[..., :3]
    log_scales_0 = log_scales[..., :3]
    centered_labels = labels - locs_0
    inv_stdv = tf.exp(-log_scales_0)
    plus_in = inv_stdv * (centered_labels + 1. / 255.)
    min_in = inv_stdv * (centered_labels - 1. / 255.)
    cdf_plus = tf.nn.sigmoid(plus_in)
    cdf_min = tf.nn.sigmoid(min_in)
    expected_loss = -tf.reduce_sum(tf.log(cdf_plus - cdf_min), axis=-1)

    actual_loss = common_layers.discretized_mix_logistic_loss(
        pred=pred, labels=labels)
    actual_loss_val, expected_loss_val = self.evaluate(
        [actual_loss, expected_loss])
    self.assertAllClose(actual_loss_val, expected_loss_val, rtol=1e-5)

Source File: modalities.py From tensor2tensor with Apache License 2.0

5 votes

def video_l1_loss(top_out, targets, model_hparams, vocab_size, weights_fn):
  """Compute loss numerator and denominator for one shard of output."""
  del vocab_size  # unused arg
  logits = top_out
  logits = tf.reshape(logits, [-1] + common_layers.shape_list(logits)[2:-1])
  targets = tf.reshape(targets, [-1] + common_layers.shape_list(targets)[2:])
  weights = weights_fn(targets)
  # Shift targets by 0.5 so later just casting to int gives the prediction.
  # So for int targets, say 0 and 7, we actually train to predict 0.5 and 7.5.
  # Later (in merics or infer) this is cast to int anyway. Also, we have no
  # loss beyond cutoff = 0.2 as these are already correct predictions.
  targets = tf.to_float(targets) + 0.5
  loss = video_l1_internal_loss(logits, targets, model_hparams)
  return tf.reduce_sum(loss * weights), tf.reduce_sum(weights)

Source File: common_layers.py From tensor2tensor with Apache License 2.0

5 votes

def dml_loss(pred, labels, weights_fn=_weights_one_third, reduce_sum=True):
  """Discretized mixture of logistics loss.

  Args:
    pred: A [batch, height, width, num_mixtures*10] tensor of floats
      comprising one unconstrained mixture probability, three means
      (one per channel), three standard deviations (one per channel),
      and three coefficients which linearly parameterize dependence across
      channels.
    labels: A [batch, height, width, channels] tensor of 8-bit pixel
      intensities. The computation assumes channels is 3.
    weights_fn: A function of labels, returning a Tensor of shape
      [batch, height, width] which weights each loss term. Default is to scale
      each loss term by 1/3 so that they capture the average across channels.
    reduce_sum: A boolean, to return scalar loss instead of per position.

  Returns:
    Tuple of loss tensors for numerator and denominator, each a scalar if
    reduce_sum else of shape [batch, height, width]. The sum of their divisions
    is the number of nats for each pixel in labels.
  """
  real_labels = convert_rgb_to_symmetric_real(labels)
  dml_loss_value = discretized_mix_logistic_loss(pred=pred, labels=real_labels)
  weights = weights_fn(labels)
  loss_num = weights * dml_loss_value
  loss_den = weights_nonzero(weights)
  if reduce_sum:
    loss_num = tf.reduce_sum(loss_num)
    loss_den = tf.reduce_sum(loss_den)
  return loss_num, loss_den

Source File: modalities.py From tensor2tensor with Apache License 2.0

5 votes

def sigmoid_class_label_loss(top_out,
                             targets,
                             model_hparams,
                             vocab_size,
                             weights_fn):
  """Loss for class label."""
  # Expect inputs of size [batch-size, timesteps, 1, num-classes], where the
  # last dimension of num-classes represents logits for binary labels
  del model_hparams, vocab_size  # unused arg
  loss_scale = tf.losses.sigmoid_cross_entropy(
      multi_class_labels=targets, logits=top_out)
  weights = weights_fn(targets)
  loss_denom = tf.reduce_sum(weights)
  return loss_scale, loss_denom

Source File: modalities.py From tensor2tensor with Apache License 2.0

5 votes

def real_l2_loss(top_out, targets, model_hparams, vocab_size, weights_fn):
  del model_hparams, vocab_size  # unused arg
  predictions = top_out
  if (len(common_layers.shape_list(top_out)) != len(
      common_layers.shape_list(targets))):
    predictions = tf.squeeze(top_out, axis=[-1])
  with tf.name_scope("l2"):
    weights = weights_fn(targets)
    l2 = tf.pow(predictions - targets, 2)
    return tf.reduce_sum(l2 * weights), tf.reduce_sum(weights)

Source File: base.py From tensor2tensor with Apache License 2.0

5 votes

def body(self, features):
    self.has_actions = "input_action" in features
    self.has_rewards = "target_reward" in features
    self.has_policies = "target_policy" in features
    self.has_values = "target_value" in features
    hparams = self.hparams

    def merge(inputs, targets):
      """Split inputs and targets into lists."""
      inputs = tf.unstack(inputs, axis=1)
      targets = tf.unstack(targets, axis=1)
      assert len(inputs) == hparams.video_num_input_frames
      assert len(targets) == hparams.video_num_target_frames
      return inputs + targets

    frames = merge(features["inputs"], features["targets"])
    frames_raw = merge(features["inputs_raw"], features["targets_raw"])
    actions, rewards = None, None
    if self.has_actions:
      actions = merge(features["input_action"], features["target_action"])
    if self.has_rewards:
      rewards = merge(features["input_reward"], features["target_reward"])

    # Reset the internal states if the reset_internal_states has been
    # passed as a feature and has greater value than 0.
    if self.is_recurrent_model and self.internal_states is not None:
      def reset_func():
        reset_ops = flat_lists(self.reset_internal_states_ops())
        with tf.control_dependencies(reset_ops):
          return tf.no_op()
      if self.is_predicting and "reset_internal_states" in features:
        reset = features["reset_internal_states"]
        reset = tf.greater(tf.reduce_sum(reset), 0.5)
        reset_ops = tf.cond(reset, reset_func, tf.no_op)
      else:
        reset_ops = tf.no_op()
      with tf.control_dependencies([reset_ops]):
        frames[0] = tf.identity(frames[0])

    with tf.control_dependencies([frames[0]]):
      return self.__process(frames, actions, rewards, frames_raw)

Source File: epva.py From tensor2tensor with Apache License 2.0

5 votes

def mean_squared_error(true, pred):
  """L2 distance between tensors true and pred.

  Args:
    true: the ground truth image.
    pred: the predicted image.
  Returns:
    mean squared error between ground truth and predicted image.
  """
  result = tf.reduce_sum(
      tf.squared_difference(true, pred)) / tf.to_float(tf.size(pred))
  return result

Source File: epva.py From tensor2tensor with Apache License 2.0

5 votes

def l1_error(true, pred):
  """L1 distance between tensors true and pred."""
  return tf.reduce_sum(tf.abs(true - pred)) / tf.to_float(tf.size(pred))

Python tensorflow.compat.v1.reduce_sum() Examples