Python Examples of tensorflow.compat.v1.Tensor

Source File: attention_lm_moe.py From tensor2tensor with Apache License 2.0

6 votes

def expand_batch_coordinates(bc, length_factor):
  """Duplicate elements of bc by length_factor.

  Args:
    bc (tf.Tensor): int32 tensor of shape [1, length, 1]
    length_factor (int):

  Returns:
    tf.Tensor: of shape [1, length*length_factor, 1] where every elements has
      been duplicated length_factor times.
  """
  assert bc.get_shape().as_list() == [1, None, 1]
  # bc has shape [1, length, 1]
  bc *= tf.constant([[1] * length_factor])
  # bc has shape [1, length, length_factor]
  bc = tf.reshape(bc, [1, -1, 1])
  # bc has shape [1, length*length_factor]
  return bc

Source File: expert_utils.py From tensor2tensor with Apache License 2.0

6 votes

def __init__(self, num_experts, gates):
    """Create a SparseDispatcher.

    Args:
      num_experts: an integer.
      gates: a `Tensor` of shape `[batch_size, num_experts]`.

    Returns:
      a SparseDispatcher
    """
    self._gates = gates
    self._num_experts = num_experts

    where = tf.to_int32(tf.where(tf.transpose(gates) > 0))
    self._expert_index, self._batch_index = tf.unstack(where, num=2, axis=1)
    self._part_sizes_tensor = tf.reduce_sum(tf.to_int32(gates > 0), [0])
    self._nonzero_gates = tf.gather(
        tf.reshape(self._gates, [-1]),
        self._batch_index * num_experts + self._expert_index)

Source File: expert_utils.py From tensor2tensor with Apache License 2.0

6 votes

def combine(self, expert_out, multiply_by_gates=True):
    """Sum together the expert output, weighted by the gates.

    The slice corresponding to a particular batch element `b` is computed
    as the sum over all experts `i` of the expert output, weighted by the
    corresponding gate values.  If `multiply_by_gates` is set to False, the
    gate values are ignored.

    Args:
      expert_out: a list of `num_experts` `Tensor`s, each with shape
        `[expert_batch_size_i, <extra_output_dims>]`.
      multiply_by_gates: a boolean

    Returns:
      a `Tensor` with shape `[batch_size, <extra_output_dims>]`.
    """
    # see comments on convert_gradient_to_tensor
    stitched = common_layers.convert_gradient_to_tensor(
        tf.concat(expert_out, 0))
    if multiply_by_gates:
      stitched *= tf.expand_dims(self._nonzero_gates, 1)
    combined = tf.unsorted_segment_sum(stitched, self._batch_index,
                                       tf.shape(self._gates)[0])
    return combined

Source File: model_tf1.py From machine-learning-for-programming-samples with MIT License

6 votes

def compute_logits(self, token_ids: tf.Tensor) -> tf.Tensor:
        """
        Implements a language model, where each output is conditional on the current
        input and inputs processed so far.

        Args:
            token_ids: int32 tensor of shape [B, T], storing integer IDs of tokens.

        Returns:
            tf.float32 tensor of shape [B, T, V], storing the distribution over output symbols
            for each timestep for each batch element.
        """
        # TODO 5# 1) Embed tokens
        # TODO 5# 2) Run RNN on embedded tokens
        # TODO 5# 3) Project RNN outputs onto the vocabulary to obtain logits.
        return rnn_output_logits

Source File: expert_utils.py From tensor2tensor with Apache License 2.0

6 votes

def cv_squared(x):
  """The squared coefficient of variation of a sample.

  Useful as a loss to encourage a positive distribution to be more uniform.
  Epsilons added for numerical stability.
  Returns 0 for an empty Tensor.

  Args:
    x: a `Tensor`.

  Returns:
    a `Scalar`.
  """
  epsilon = 1e-10
  float_size = tf.to_float(tf.size(x)) + epsilon
  mean = tf.reduce_sum(x) / float_size
  variance = tf.reduce_sum(tf.squared_difference(x, mean)) / float_size
  return variance / (tf.square(mean) + epsilon)

Source File: expert_utils.py From tensor2tensor with Apache License 2.0

6 votes

def restore(self, x):
    """Add padding back to the given tensor.

    Args:
      x (tf.Tensor): of shape [dim_compressed,...]

    Returns:
      a tensor of shape [dim_origin,...] with dim_compressed >= dim_origin. The
      dim is restored from the original reference tensor
    """
    with tf.name_scope("pad_reduce/restore"):
      x = tf.scatter_nd(
          indices=self.nonpad_ids,
          updates=x,
          shape=tf.concat([self.dim_origin, tf.shape(x)[1:]], axis=0),
      )
    return x

Source File: expert_utils.py From tensor2tensor with Apache License 2.0

6 votes

def remove(self, x):
    """Remove padding from the given tensor.

    Args:
      x (tf.Tensor): of shape [dim_origin,...]

    Returns:
      a tensor of shape [dim_compressed,...] with dim_compressed <= dim_origin
    """
    with tf.name_scope("pad_reduce/remove"):
      x_shape = x.get_shape().as_list()
      x = tf.gather_nd(
          x,
          indices=self.nonpad_ids,
      )
      if not tf.executing_eagerly():
        # This is a hack but for some reason, gather_nd return a tensor of
        # undefined shape, so the shape is set up manually
        x.set_shape([None] + x_shape[1:])
    return x

Source File: expert_utils.py From tensor2tensor with Apache License 2.0

6 votes

def __init__(self, pad_mask):
    """Compute and store the location of the padding.

    Args:
      pad_mask (tf.Tensor): Reference padding tensor of shape
        [batch_size,length] or [dim_origin] (dim_origin=batch_size*length)
        containing non-zeros positive values to indicate padding location.
    """
    self.nonpad_ids = None
    self.dim_origin = None

    with tf.name_scope("pad_reduce/get_ids"):
      pad_mask = tf.reshape(pad_mask, [-1])  # Flatten the batch
      # nonpad_ids contains coordinates of zeros rows (as pad_mask is
      # float32, checking zero equality is done with |x| < epsilon, with
      # epsilon=1e-9 as standard, here pad_mask only contains positive values
      # so tf.abs would be redundant)
      self.nonpad_ids = tf.to_int32(tf.where(pad_mask < 1e-9))
      self.dim_origin = tf.shape(pad_mask)[:1]

Source File: common_layers.py From tensor2tensor with Apache License 2.0

6 votes

def weights_multi_problem(labels, taskid=-1):
  """Assign weight 1.0 to only the "targets" portion of the labels.

  Weight 1.0 is assigned to all labels past the taskid.

  Args:
    labels: A Tensor of int32s.
    taskid: an int32 representing the task id for a problem.

  Returns:
    A Tensor of floats.

  Raises:
    ValueError: The Task ID must be valid.
  """
  taskid = check_nonnegative(taskid)
  past_taskid = tf.cumsum(to_float(tf.equal(labels, taskid)), axis=1)
  # Additionally zero out the task id location
  past_taskid *= to_float(tf.not_equal(labels, taskid))
  non_taskid = to_float(labels)
  return to_float(tf.not_equal(past_taskid * non_taskid, 0))

Source File: expert_utils.py From tensor2tensor with Apache License 2.0

6 votes

def _normal_distribution_cdf(x, stddev):
  """Evaluates the CDF of the normal distribution.

  Normal distribution with mean 0 and standard deviation stddev,
  evaluated at x=x.

  input and output `Tensor`s have matching shapes.

  Args:
    x: a `Tensor`
    stddev: a `Tensor` with the same shape as `x`.

  Returns:
    a `Tensor` with the same shape as `x`.

  """
  return 0.5 * (1.0 + tf.erf(x / (math.sqrt(2) * stddev + 1e-20)))

Source File: expert_utils.py From tensor2tensor with Apache License 2.0

6 votes

def __init__(self, data_parallelism, expert_parallelism, gates):
    """Create a DistributedSparseDispatcher.

    Args:
      data_parallelism: a Parallelism object.
      expert_parallelism: a Parallelism object.
      gates: a list of datashard_parallelism.n `Tensor`s of shapes
        `[batch_size[d], num_experts]`.

    Returns:
      a DistributedSparseDispatcher
    """
    self._gates = gates
    self._dp = data_parallelism
    self._ep = expert_parallelism
    assert len(gates) == self._dp.n
    self._dispatchers = self._dp(SparseDispatcher, self._ep.n, gates)

Source File: expert_utils.py From tensor2tensor with Apache License 2.0

6 votes

def dispatch(self, inp):
    """Create one input Tensor for each expert.

    Args:
      inp: a list of length num_datashards `Tensor`s with shapes
        `[batch_size[d], <extra_input_dims>]`.
    Returns:
      a list of `num_experts` `Tensor`s with shapes
        `[num_examples[i], <extra_input_dims>]`.
    """
    dispatched = self._dp(lambda a, b: a.dispatch(b), self._dispatchers, inp)
    ret = self._ep(tf.concat, transpose_list_of_lists(dispatched), 0)
    if ret[0].dtype == tf.float32:
      # see comments on common_layers.convert_gradient_to_tensor
      ret = self._ep(common_layers.convert_gradient_to_tensor, ret)
    return ret

Source File: common_layers.py From tensor2tensor with Apache License 2.0

6 votes

def get_timing_signal(length,
                      min_timescale=1,
                      max_timescale=1e4,
                      num_timescales=16):
  """Create Tensor of sinusoids of different frequencies.

  Args:
    length: Length of the Tensor to create, i.e. Number of steps.
    min_timescale: a float
    max_timescale: a float
    num_timescales: an int

  Returns:
    Tensor of shape (length, 2*num_timescales)
  """
  positions = to_float(tf.range(length))
  log_timescale_increment = (
      math.log(max_timescale / min_timescale) / (num_timescales - 1))
  inv_timescales = min_timescale * tf.exp(
      to_float(tf.range(num_timescales)) * -log_timescale_increment)
  scaled_time = tf.expand_dims(positions, 1) * tf.expand_dims(inv_timescales, 0)
  return tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=1)

Source File: t2t_model.py From tensor2tensor with Apache License 2.0

6 votes

def eval_autoregressive(self, features=None, decode_length=50):
    """Autoregressive eval.

    Quadratic time in decode_length.

    Args:
      features: an map of string to `Tensor`
      decode_length: an integer.  How many additional timesteps to decode.

    Returns:
      logits: `Tensor`
      losses: a dictionary: {loss-name (string): floating point `Scalar`}.
          Contains a single key "training".
    """
    results = self._slow_greedy_infer(features, decode_length=decode_length)
    return results["logits"], results["losses"]

Source File: expert_utils.py From tensor2tensor with Apache License 2.0

6 votes

def combine(self, x):
    """Return the output from the experts.

    When one example goes to multiple experts, the outputs are summed.

    Args:
      x: a Tensor with shape [batch, num_experts, expert_capacity, depth]

    Returns:
      a `Tensor` with shape `[batch, length, depth]
    """
    depth = tf.shape(x)[-1]
    x *= tf.expand_dims(self._nonpadding, -1)
    ret = tf.unsorted_segment_sum(
        x, self._flat_indices, num_segments=self._batch * self._length)
    ret = tf.reshape(ret, [self._batch, self._length, depth])
    return ret

Source File: attention_lm_moe.py From tensor2tensor with Apache License 2.0

6 votes

def remove_pad(x, pad_remover, mode):
  """Remove padding by concatenating all dimension into one.

  Args:
    x (tf.Tensor): input of shape [batch_size, length, depth]
    pad_remover (obj): a PadRemover object
    mode (ModeKeys): infer, train or eval. If inference, the padding remover is
      not applied

  Returns:
    tf.Tensor of shape [1,length_nonpad,depth] where
      length_nonpad <= batch_size*length
  """
  # Concatenate all tokens (without padding)
  x = expert_utils.flatten_all_but_last(x)

  # Remove padding for training and eval
  if mode != ModeKeys.PREDICT:
    # This is a hack to allows inference when the <go> token
    # is detected as padding and removed. This works for now because there is
    # no padding at inference.
    x = pad_remover.remove(x)

  x = tf.expand_dims(x, axis=0)  # Now batch_size=1
  return x

Source File: shuffle_network.py From tensor2tensor with Apache License 2.0

6 votes

def loss(self, logits, features):
    """Loss function for Neural Shuffle-Exchange network.

    We use custom loss function as default loss function doesn't
    use padding for calculating loss. We assume that output string is same
    length as the input. If you need other type of output please feel
    free to modify this.

    Args:
      logits: Logits from model
      features: Features, not in one-hot format

    Returns:
       tf.Tensor: Loss value
    """

    onehot_labels = tf.one_hot(features["targets"],
                               self._problem_hparams.vocab_size["targets"])
    cost_vector = tf.nn.softmax_cross_entropy_with_logits_v2(
        logits=logits, labels=onehot_labels)
    return tf.reduce_mean(cost_vector)

Source File: shuffle_network.py From tensor2tensor with Apache License 2.0

6 votes

def pad(tensor, pad_len):
    """Pad tensor on first dimension to pad_len.

    Args:
      tensor: input tensor of shape length >= 2
      pad_len: pad length

    Returns:
      tf.Tensor: Padded input tensor.
    """

    assert len(tensor.shape) >= 2  # tensor of shape [batch, length, ...]
    length = tf.shape(tensor)[1]

    padding = [[0, 0], [0, pad_len - length]]
    padding += [[0, 0]] * (len(tensor.shape) - 2)
    return tf.pad(tensor, padding)

Source File: common_layers.py From tensor2tensor with Apache License 2.0

6 votes

def convert_gradient_to_tensor(x):
  """Identity operation whose gradient is converted to a `Tensor`.

  Currently, the gradient to `tf.concat` is particularly expensive to
  compute if dy is an `IndexedSlices` (a lack of GPU implementation
  forces the gradient operation onto CPU).  This situation occurs when
  the output of the `tf.concat` is eventually passed to `tf.gather`.
  It is sometimes faster to convert the gradient to a `Tensor`, so as
  to get the cheaper gradient for `tf.concat`.  To do this, replace
  `tf.concat(x)` with `convert_gradient_to_tensor(tf.concat(x))`.

  Args:
    x: A `Tensor`.

  Returns:
    The input `Tensor`.
  """
  return x

Source File: shuffle_network.py From tensor2tensor with Apache License 2.0

6 votes

def shuffle_layer(inputs, shuffle_fn=rol):
  """Shuffles the elements according to bitwise left or right rotation.

  Args:
    inputs: Tensor input from previous layer
    shuffle_fn: Shift function rol or ror

  Returns:
    tf.Tensor: Inputs shifted according to shuffle_fn
  """

  length = tf.shape(inputs)[1]
  n_bits = tf.log(tf.cast(length - 1, tf.float32)) / tf.log(2.0)
  n_bits = tf.cast(n_bits, tf.int32) + 1

  indices = tf.range(0, length)
  rev_indices = shuffle_fn(indices, n_bits)
  return tf.gather(inputs, rev_indices, axis=1)

Source File: shuffle_network.py From tensor2tensor with Apache License 2.0

6 votes

def rol(x, n, p=1):
  """Bitwise left rotation.

  Args:
    x: Input tensor
    n: Bit count to represent x
    p: Bit positions to shift

  Returns:
    tf.Tensor: x shifted by p positions in n bits
  """
  a = tf.bitwise.left_shift(x, p)
  b = tf.bitwise.left_shift(1, n) - 1
  c = tf.bitwise.bitwise_and(a, b)
  d = tf.bitwise.right_shift(x, n - p)

  return tf.bitwise.bitwise_or(c, d)

Source File: shuffle_network.py From tensor2tensor with Apache License 2.0

6 votes

def ror(x, n, p=1):
  """Bitwise right rotation.

  Args:
    x: Input tensor
    n: Bit count to represent x
    p: Bit positions to shift

  Returns:
    tf.Tensor: x shifted by p positions in n bits
  """

  a = tf.bitwise.right_shift(x, p)
  b = tf.bitwise.left_shift(1, p) - 1
  c = tf.bitwise.bitwise_and(x, b)
  d = tf.bitwise.left_shift(c, n - p)

  return a + d

Source File: t2t_model.py From tensor2tensor with Apache License 2.0

6 votes

def summarize_features(features, num_shards=1):
  """Generate summaries for features."""
  if not common_layers.should_generate_summaries():
    return

  with tf.name_scope("input_stats"):
    for (k, v) in sorted(six.iteritems(features)):
      if (isinstance(v, tf.Tensor) and (v.get_shape().ndims > 1) and
          (v.dtype != tf.string)):
        tf.summary.scalar("%s_batch" % k, tf.shape(v)[0] // num_shards)
        tf.summary.scalar("%s_length" % k, tf.shape(v)[1])
        nonpadding = tf.to_float(tf.not_equal(v, 0))
        nonpadding_tokens = tf.reduce_sum(nonpadding)
        tf.summary.scalar("%s_nonpadding_tokens" % k, nonpadding_tokens)
        tf.summary.scalar("%s_nonpadding_fraction" % k,
                          tf.reduce_mean(nonpadding))

Source File: t2t_model.py From tensor2tensor with Apache License 2.0

6 votes

def average_sharded_losses(sharded_losses):
  """Average losses across datashards.

  Args:
    sharded_losses: list<dict<str loss_name, Tensor loss>>. The loss
      can be a single Tensor or a 2-tuple (numerator and denominator).

  Returns:
    losses: dict<str loss_name, Tensor avg_loss>
  """
  losses = {}
  for loss_name in sorted(sharded_losses[0]):
    all_shards = [shard_losses[loss_name] for shard_losses in sharded_losses]
    if isinstance(all_shards[0], tuple):
      sharded_num, sharded_den = zip(*all_shards)
      mean_loss = (
          tf.add_n(sharded_num) / tf.maximum(
              tf.cast(1.0, sharded_den[0].dtype), tf.add_n(sharded_den)))
    else:
      mean_loss = tf.reduce_mean(all_shards)

    losses[loss_name] = mean_loss
  return losses

Source File: t2t_model.py From tensor2tensor with Apache License 2.0

6 votes

def _beam_decode(self,
                   features,
                   decode_length,
                   beam_size,
                   top_beams,
                   alpha,
                   use_tpu=False):
    """Beam search decoding.

    Models should ideally implement a more efficient version of this function.

    Args:
      features: an map of string to `Tensor`
      decode_length: an integer.  How many additional timesteps to decode.
      beam_size: number of beams.
      top_beams: an integer. How many of the beams to return.
      alpha: Float that controls the length penalty. larger the alpha, stronger
        the preference for longer translations.
      use_tpu: A bool, whether to do beam decode on TPU.

    Returns:
       samples: an integer `Tensor`. Top samples from the beam search
    """
    return self._beam_decode_slow(features, decode_length, beam_size, top_beams,
                                  alpha, use_tpu)

Source File: t2t_model.py From tensor2tensor with Apache License 2.0

6 votes

def body(self, features):
    """Computes the targets' pre-logit activations given transformed inputs.

    Most `T2TModel` subclasses will override this method.

    Args:
      features: dict of str to Tensor, where each Tensor has shape [batch_size,
        ..., hidden_size]. It typically contains keys `inputs` and `targets`.

    Returns:
      output: Tensor of pre-logit activations with shape [batch_size, ...,
              hidden_size].
      losses: Either single loss as a scalar, a list, a Tensor (to be averaged),
              or a dictionary of losses. If losses is a dictionary with the key
              "training", losses["training"] is considered the final training
              loss and output is considered logits; self.top and self.loss will
              be skipped.
    """
    raise NotImplementedError("Abstract Method")

Source File: common_layers.py From tensor2tensor with Apache License 2.0

5 votes

def check_nonnegative(value):
  """Check that the value is nonnegative."""
  if isinstance(value, tf.Tensor):
    with tf.control_dependencies([tf.assert_greater_equal(value, 0)]):
      value = tf.identity(value)
  elif value < 0:
    raise ValueError("Value must be non-negative.")
  return value

Source File: residual_shuffle_exchange.py From tensor2tensor with Apache License 2.0

5 votes

def reverse_part(inputs, hparams, n_bits):
  """Reverse part of Benes block.

  Repeatably applies interleaved Residual Switch layer and Reverse Shuffle
  Layer. One set of weights used for all Switch layers.

  Args:
    inputs: inputs for reverse part. Should be outputs from forward part.
    hparams: params of the network.
    n_bits: count of repeated layer applications.

  Returns:
    tf.Tensor: output of reverse part.
  """
  reverse_rsu = RSU("reverse_switch", hparams.dropout, hparams.mode)

  def reverse_step(state, _):
    with tf.variable_scope("reverse"):
      new_state = reverse_rsu(state)
      return reverse_shuffle_layer(new_state)

  reverse_outputs = tf.scan(
      reverse_step,
      tf.range(n_bits, n_bits * 2),
      initializer=inputs,
      parallel_iterations=1,
      swap_memory=True)

  return reverse_outputs[-1, :, :, :]

Source File: common_layers.py From tensor2tensor with Apache License 2.0

5 votes

def weights_prepend_inputs_to_targets(labels):
  """Assign weight 1.0 to only the "targets" portion of the labels.

  Weight 1.0 is assigned to all nonzero labels past the first zero.
  See prepend_mode in common_hparams.py

  Args:
    labels: A Tensor of int32s.

  Returns:
    A Tensor of floats.
  """
  past_first_zero = tf.cumsum(to_float(tf.equal(labels, 0)), axis=1)
  nonzero = to_float(labels)
  return to_float(tf.not_equal(past_first_zero * nonzero, 0))

Source File: common_layers.py From tensor2tensor with Apache License 2.0

5 votes

def cumsum(x, axis=0, exclusive=False):
  """TPU hack for tf.cumsum.

  This is equivalent to tf.cumsum and is faster on TPU as of 04/2018 unless
  the axis dimension is very large.

  Args:
    x: a Tensor
    axis: an integer
    exclusive: a boolean

  Returns:
    Tensor of the same shape as x.
  """
  if not is_xla_compiled():
    return tf.cumsum(x, axis=axis, exclusive=exclusive)
  x_shape = shape_list(x)
  rank = len(x_shape)
  length = x_shape[axis]
  my_range = tf.range(length)
  comparator = tf.less if exclusive else tf.less_equal
  mask = tf.cast(
      comparator(tf.expand_dims(my_range, 1), tf.expand_dims(my_range, 0)),
      x.dtype)
  ret = tf.tensordot(x, mask, axes=[[axis], [0]])
  if axis != rank - 1:
    ret = tf.transpose(
        ret,
        list(range(axis)) + [rank - 1] + list(range(axis, rank - 1)))
  return ret

Python tensorflow.compat.v1.Tensor() Examples