Python tensorflow.compat.v1.Tensor() Examples
The following are 30
code examples of tensorflow.compat.v1.Tensor().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.compat.v1
, or try the search function
.
Example #1
Source File: attention_lm_moe.py From tensor2tensor with Apache License 2.0 | 6 votes |
def expand_batch_coordinates(bc, length_factor): """Duplicate elements of bc by length_factor. Args: bc (tf.Tensor): int32 tensor of shape [1, length, 1] length_factor (int): Returns: tf.Tensor: of shape [1, length*length_factor, 1] where every elements has been duplicated length_factor times. """ assert bc.get_shape().as_list() == [1, None, 1] # bc has shape [1, length, 1] bc *= tf.constant([[1] * length_factor]) # bc has shape [1, length, length_factor] bc = tf.reshape(bc, [1, -1, 1]) # bc has shape [1, length*length_factor] return bc
Example #2
Source File: expert_utils.py From tensor2tensor with Apache License 2.0 | 6 votes |
def __init__(self, num_experts, gates): """Create a SparseDispatcher. Args: num_experts: an integer. gates: a `Tensor` of shape `[batch_size, num_experts]`. Returns: a SparseDispatcher """ self._gates = gates self._num_experts = num_experts where = tf.to_int32(tf.where(tf.transpose(gates) > 0)) self._expert_index, self._batch_index = tf.unstack(where, num=2, axis=1) self._part_sizes_tensor = tf.reduce_sum(tf.to_int32(gates > 0), [0]) self._nonzero_gates = tf.gather( tf.reshape(self._gates, [-1]), self._batch_index * num_experts + self._expert_index)
Example #3
Source File: expert_utils.py From tensor2tensor with Apache License 2.0 | 6 votes |
def combine(self, expert_out, multiply_by_gates=True): """Sum together the expert output, weighted by the gates. The slice corresponding to a particular batch element `b` is computed as the sum over all experts `i` of the expert output, weighted by the corresponding gate values. If `multiply_by_gates` is set to False, the gate values are ignored. Args: expert_out: a list of `num_experts` `Tensor`s, each with shape `[expert_batch_size_i, <extra_output_dims>]`. multiply_by_gates: a boolean Returns: a `Tensor` with shape `[batch_size, <extra_output_dims>]`. """ # see comments on convert_gradient_to_tensor stitched = common_layers.convert_gradient_to_tensor( tf.concat(expert_out, 0)) if multiply_by_gates: stitched *= tf.expand_dims(self._nonzero_gates, 1) combined = tf.unsorted_segment_sum(stitched, self._batch_index, tf.shape(self._gates)[0]) return combined
Example #4
Source File: model_tf1.py From machine-learning-for-programming-samples with MIT License | 6 votes |
def compute_logits(self, token_ids: tf.Tensor) -> tf.Tensor: """ Implements a language model, where each output is conditional on the current input and inputs processed so far. Args: token_ids: int32 tensor of shape [B, T], storing integer IDs of tokens. Returns: tf.float32 tensor of shape [B, T, V], storing the distribution over output symbols for each timestep for each batch element. """ # TODO 5# 1) Embed tokens # TODO 5# 2) Run RNN on embedded tokens # TODO 5# 3) Project RNN outputs onto the vocabulary to obtain logits. return rnn_output_logits
Example #5
Source File: expert_utils.py From tensor2tensor with Apache License 2.0 | 6 votes |
def cv_squared(x): """The squared coefficient of variation of a sample. Useful as a loss to encourage a positive distribution to be more uniform. Epsilons added for numerical stability. Returns 0 for an empty Tensor. Args: x: a `Tensor`. Returns: a `Scalar`. """ epsilon = 1e-10 float_size = tf.to_float(tf.size(x)) + epsilon mean = tf.reduce_sum(x) / float_size variance = tf.reduce_sum(tf.squared_difference(x, mean)) / float_size return variance / (tf.square(mean) + epsilon)
Example #6
Source File: expert_utils.py From tensor2tensor with Apache License 2.0 | 6 votes |
def restore(self, x): """Add padding back to the given tensor. Args: x (tf.Tensor): of shape [dim_compressed,...] Returns: a tensor of shape [dim_origin,...] with dim_compressed >= dim_origin. The dim is restored from the original reference tensor """ with tf.name_scope("pad_reduce/restore"): x = tf.scatter_nd( indices=self.nonpad_ids, updates=x, shape=tf.concat([self.dim_origin, tf.shape(x)[1:]], axis=0), ) return x
Example #7
Source File: expert_utils.py From tensor2tensor with Apache License 2.0 | 6 votes |
def remove(self, x): """Remove padding from the given tensor. Args: x (tf.Tensor): of shape [dim_origin,...] Returns: a tensor of shape [dim_compressed,...] with dim_compressed <= dim_origin """ with tf.name_scope("pad_reduce/remove"): x_shape = x.get_shape().as_list() x = tf.gather_nd( x, indices=self.nonpad_ids, ) if not tf.executing_eagerly(): # This is a hack but for some reason, gather_nd return a tensor of # undefined shape, so the shape is set up manually x.set_shape([None] + x_shape[1:]) return x
Example #8
Source File: expert_utils.py From tensor2tensor with Apache License 2.0 | 6 votes |
def __init__(self, pad_mask): """Compute and store the location of the padding. Args: pad_mask (tf.Tensor): Reference padding tensor of shape [batch_size,length] or [dim_origin] (dim_origin=batch_size*length) containing non-zeros positive values to indicate padding location. """ self.nonpad_ids = None self.dim_origin = None with tf.name_scope("pad_reduce/get_ids"): pad_mask = tf.reshape(pad_mask, [-1]) # Flatten the batch # nonpad_ids contains coordinates of zeros rows (as pad_mask is # float32, checking zero equality is done with |x| < epsilon, with # epsilon=1e-9 as standard, here pad_mask only contains positive values # so tf.abs would be redundant) self.nonpad_ids = tf.to_int32(tf.where(pad_mask < 1e-9)) self.dim_origin = tf.shape(pad_mask)[:1]
Example #9
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 6 votes |
def weights_multi_problem(labels, taskid=-1): """Assign weight 1.0 to only the "targets" portion of the labels. Weight 1.0 is assigned to all labels past the taskid. Args: labels: A Tensor of int32s. taskid: an int32 representing the task id for a problem. Returns: A Tensor of floats. Raises: ValueError: The Task ID must be valid. """ taskid = check_nonnegative(taskid) past_taskid = tf.cumsum(to_float(tf.equal(labels, taskid)), axis=1) # Additionally zero out the task id location past_taskid *= to_float(tf.not_equal(labels, taskid)) non_taskid = to_float(labels) return to_float(tf.not_equal(past_taskid * non_taskid, 0))
Example #10
Source File: expert_utils.py From tensor2tensor with Apache License 2.0 | 6 votes |
def _normal_distribution_cdf(x, stddev): """Evaluates the CDF of the normal distribution. Normal distribution with mean 0 and standard deviation stddev, evaluated at x=x. input and output `Tensor`s have matching shapes. Args: x: a `Tensor` stddev: a `Tensor` with the same shape as `x`. Returns: a `Tensor` with the same shape as `x`. """ return 0.5 * (1.0 + tf.erf(x / (math.sqrt(2) * stddev + 1e-20)))
Example #11
Source File: expert_utils.py From tensor2tensor with Apache License 2.0 | 6 votes |
def __init__(self, data_parallelism, expert_parallelism, gates): """Create a DistributedSparseDispatcher. Args: data_parallelism: a Parallelism object. expert_parallelism: a Parallelism object. gates: a list of datashard_parallelism.n `Tensor`s of shapes `[batch_size[d], num_experts]`. Returns: a DistributedSparseDispatcher """ self._gates = gates self._dp = data_parallelism self._ep = expert_parallelism assert len(gates) == self._dp.n self._dispatchers = self._dp(SparseDispatcher, self._ep.n, gates)
Example #12
Source File: expert_utils.py From tensor2tensor with Apache License 2.0 | 6 votes |
def dispatch(self, inp): """Create one input Tensor for each expert. Args: inp: a list of length num_datashards `Tensor`s with shapes `[batch_size[d], <extra_input_dims>]`. Returns: a list of `num_experts` `Tensor`s with shapes `[num_examples[i], <extra_input_dims>]`. """ dispatched = self._dp(lambda a, b: a.dispatch(b), self._dispatchers, inp) ret = self._ep(tf.concat, transpose_list_of_lists(dispatched), 0) if ret[0].dtype == tf.float32: # see comments on common_layers.convert_gradient_to_tensor ret = self._ep(common_layers.convert_gradient_to_tensor, ret) return ret
Example #13
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 6 votes |
def get_timing_signal(length, min_timescale=1, max_timescale=1e4, num_timescales=16): """Create Tensor of sinusoids of different frequencies. Args: length: Length of the Tensor to create, i.e. Number of steps. min_timescale: a float max_timescale: a float num_timescales: an int Returns: Tensor of shape (length, 2*num_timescales) """ positions = to_float(tf.range(length)) log_timescale_increment = ( math.log(max_timescale / min_timescale) / (num_timescales - 1)) inv_timescales = min_timescale * tf.exp( to_float(tf.range(num_timescales)) * -log_timescale_increment) scaled_time = tf.expand_dims(positions, 1) * tf.expand_dims(inv_timescales, 0) return tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=1)
Example #14
Source File: t2t_model.py From tensor2tensor with Apache License 2.0 | 6 votes |
def eval_autoregressive(self, features=None, decode_length=50): """Autoregressive eval. Quadratic time in decode_length. Args: features: an map of string to `Tensor` decode_length: an integer. How many additional timesteps to decode. Returns: logits: `Tensor` losses: a dictionary: {loss-name (string): floating point `Scalar`}. Contains a single key "training". """ results = self._slow_greedy_infer(features, decode_length=decode_length) return results["logits"], results["losses"]
Example #15
Source File: expert_utils.py From tensor2tensor with Apache License 2.0 | 6 votes |
def combine(self, x): """Return the output from the experts. When one example goes to multiple experts, the outputs are summed. Args: x: a Tensor with shape [batch, num_experts, expert_capacity, depth] Returns: a `Tensor` with shape `[batch, length, depth] """ depth = tf.shape(x)[-1] x *= tf.expand_dims(self._nonpadding, -1) ret = tf.unsorted_segment_sum( x, self._flat_indices, num_segments=self._batch * self._length) ret = tf.reshape(ret, [self._batch, self._length, depth]) return ret
Example #16
Source File: attention_lm_moe.py From tensor2tensor with Apache License 2.0 | 6 votes |
def remove_pad(x, pad_remover, mode): """Remove padding by concatenating all dimension into one. Args: x (tf.Tensor): input of shape [batch_size, length, depth] pad_remover (obj): a PadRemover object mode (ModeKeys): infer, train or eval. If inference, the padding remover is not applied Returns: tf.Tensor of shape [1,length_nonpad,depth] where length_nonpad <= batch_size*length """ # Concatenate all tokens (without padding) x = expert_utils.flatten_all_but_last(x) # Remove padding for training and eval if mode != ModeKeys.PREDICT: # This is a hack to allows inference when the <go> token # is detected as padding and removed. This works for now because there is # no padding at inference. x = pad_remover.remove(x) x = tf.expand_dims(x, axis=0) # Now batch_size=1 return x
Example #17
Source File: shuffle_network.py From tensor2tensor with Apache License 2.0 | 6 votes |
def loss(self, logits, features): """Loss function for Neural Shuffle-Exchange network. We use custom loss function as default loss function doesn't use padding for calculating loss. We assume that output string is same length as the input. If you need other type of output please feel free to modify this. Args: logits: Logits from model features: Features, not in one-hot format Returns: tf.Tensor: Loss value """ onehot_labels = tf.one_hot(features["targets"], self._problem_hparams.vocab_size["targets"]) cost_vector = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits, labels=onehot_labels) return tf.reduce_mean(cost_vector)
Example #18
Source File: shuffle_network.py From tensor2tensor with Apache License 2.0 | 6 votes |
def pad(tensor, pad_len): """Pad tensor on first dimension to pad_len. Args: tensor: input tensor of shape length >= 2 pad_len: pad length Returns: tf.Tensor: Padded input tensor. """ assert len(tensor.shape) >= 2 # tensor of shape [batch, length, ...] length = tf.shape(tensor)[1] padding = [[0, 0], [0, pad_len - length]] padding += [[0, 0]] * (len(tensor.shape) - 2) return tf.pad(tensor, padding)
Example #19
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 6 votes |
def convert_gradient_to_tensor(x): """Identity operation whose gradient is converted to a `Tensor`. Currently, the gradient to `tf.concat` is particularly expensive to compute if dy is an `IndexedSlices` (a lack of GPU implementation forces the gradient operation onto CPU). This situation occurs when the output of the `tf.concat` is eventually passed to `tf.gather`. It is sometimes faster to convert the gradient to a `Tensor`, so as to get the cheaper gradient for `tf.concat`. To do this, replace `tf.concat(x)` with `convert_gradient_to_tensor(tf.concat(x))`. Args: x: A `Tensor`. Returns: The input `Tensor`. """ return x
Example #20
Source File: shuffle_network.py From tensor2tensor with Apache License 2.0 | 6 votes |
def shuffle_layer(inputs, shuffle_fn=rol): """Shuffles the elements according to bitwise left or right rotation. Args: inputs: Tensor input from previous layer shuffle_fn: Shift function rol or ror Returns: tf.Tensor: Inputs shifted according to shuffle_fn """ length = tf.shape(inputs)[1] n_bits = tf.log(tf.cast(length - 1, tf.float32)) / tf.log(2.0) n_bits = tf.cast(n_bits, tf.int32) + 1 indices = tf.range(0, length) rev_indices = shuffle_fn(indices, n_bits) return tf.gather(inputs, rev_indices, axis=1)
Example #21
Source File: shuffle_network.py From tensor2tensor with Apache License 2.0 | 6 votes |
def rol(x, n, p=1): """Bitwise left rotation. Args: x: Input tensor n: Bit count to represent x p: Bit positions to shift Returns: tf.Tensor: x shifted by p positions in n bits """ a = tf.bitwise.left_shift(x, p) b = tf.bitwise.left_shift(1, n) - 1 c = tf.bitwise.bitwise_and(a, b) d = tf.bitwise.right_shift(x, n - p) return tf.bitwise.bitwise_or(c, d)
Example #22
Source File: shuffle_network.py From tensor2tensor with Apache License 2.0 | 6 votes |
def ror(x, n, p=1): """Bitwise right rotation. Args: x: Input tensor n: Bit count to represent x p: Bit positions to shift Returns: tf.Tensor: x shifted by p positions in n bits """ a = tf.bitwise.right_shift(x, p) b = tf.bitwise.left_shift(1, p) - 1 c = tf.bitwise.bitwise_and(x, b) d = tf.bitwise.left_shift(c, n - p) return a + d
Example #23
Source File: t2t_model.py From tensor2tensor with Apache License 2.0 | 6 votes |
def summarize_features(features, num_shards=1): """Generate summaries for features.""" if not common_layers.should_generate_summaries(): return with tf.name_scope("input_stats"): for (k, v) in sorted(six.iteritems(features)): if (isinstance(v, tf.Tensor) and (v.get_shape().ndims > 1) and (v.dtype != tf.string)): tf.summary.scalar("%s_batch" % k, tf.shape(v)[0] // num_shards) tf.summary.scalar("%s_length" % k, tf.shape(v)[1]) nonpadding = tf.to_float(tf.not_equal(v, 0)) nonpadding_tokens = tf.reduce_sum(nonpadding) tf.summary.scalar("%s_nonpadding_tokens" % k, nonpadding_tokens) tf.summary.scalar("%s_nonpadding_fraction" % k, tf.reduce_mean(nonpadding))
Example #24
Source File: t2t_model.py From tensor2tensor with Apache License 2.0 | 6 votes |
def average_sharded_losses(sharded_losses): """Average losses across datashards. Args: sharded_losses: list<dict<str loss_name, Tensor loss>>. The loss can be a single Tensor or a 2-tuple (numerator and denominator). Returns: losses: dict<str loss_name, Tensor avg_loss> """ losses = {} for loss_name in sorted(sharded_losses[0]): all_shards = [shard_losses[loss_name] for shard_losses in sharded_losses] if isinstance(all_shards[0], tuple): sharded_num, sharded_den = zip(*all_shards) mean_loss = ( tf.add_n(sharded_num) / tf.maximum( tf.cast(1.0, sharded_den[0].dtype), tf.add_n(sharded_den))) else: mean_loss = tf.reduce_mean(all_shards) losses[loss_name] = mean_loss return losses
Example #25
Source File: t2t_model.py From tensor2tensor with Apache License 2.0 | 6 votes |
def _beam_decode(self, features, decode_length, beam_size, top_beams, alpha, use_tpu=False): """Beam search decoding. Models should ideally implement a more efficient version of this function. Args: features: an map of string to `Tensor` decode_length: an integer. How many additional timesteps to decode. beam_size: number of beams. top_beams: an integer. How many of the beams to return. alpha: Float that controls the length penalty. larger the alpha, stronger the preference for longer translations. use_tpu: A bool, whether to do beam decode on TPU. Returns: samples: an integer `Tensor`. Top samples from the beam search """ return self._beam_decode_slow(features, decode_length, beam_size, top_beams, alpha, use_tpu)
Example #26
Source File: t2t_model.py From tensor2tensor with Apache License 2.0 | 6 votes |
def body(self, features): """Computes the targets' pre-logit activations given transformed inputs. Most `T2TModel` subclasses will override this method. Args: features: dict of str to Tensor, where each Tensor has shape [batch_size, ..., hidden_size]. It typically contains keys `inputs` and `targets`. Returns: output: Tensor of pre-logit activations with shape [batch_size, ..., hidden_size]. losses: Either single loss as a scalar, a list, a Tensor (to be averaged), or a dictionary of losses. If losses is a dictionary with the key "training", losses["training"] is considered the final training loss and output is considered logits; self.top and self.loss will be skipped. """ raise NotImplementedError("Abstract Method")
Example #27
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 5 votes |
def check_nonnegative(value): """Check that the value is nonnegative.""" if isinstance(value, tf.Tensor): with tf.control_dependencies([tf.assert_greater_equal(value, 0)]): value = tf.identity(value) elif value < 0: raise ValueError("Value must be non-negative.") return value
Example #28
Source File: residual_shuffle_exchange.py From tensor2tensor with Apache License 2.0 | 5 votes |
def reverse_part(inputs, hparams, n_bits): """Reverse part of Benes block. Repeatably applies interleaved Residual Switch layer and Reverse Shuffle Layer. One set of weights used for all Switch layers. Args: inputs: inputs for reverse part. Should be outputs from forward part. hparams: params of the network. n_bits: count of repeated layer applications. Returns: tf.Tensor: output of reverse part. """ reverse_rsu = RSU("reverse_switch", hparams.dropout, hparams.mode) def reverse_step(state, _): with tf.variable_scope("reverse"): new_state = reverse_rsu(state) return reverse_shuffle_layer(new_state) reverse_outputs = tf.scan( reverse_step, tf.range(n_bits, n_bits * 2), initializer=inputs, parallel_iterations=1, swap_memory=True) return reverse_outputs[-1, :, :, :]
Example #29
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 5 votes |
def weights_prepend_inputs_to_targets(labels): """Assign weight 1.0 to only the "targets" portion of the labels. Weight 1.0 is assigned to all nonzero labels past the first zero. See prepend_mode in common_hparams.py Args: labels: A Tensor of int32s. Returns: A Tensor of floats. """ past_first_zero = tf.cumsum(to_float(tf.equal(labels, 0)), axis=1) nonzero = to_float(labels) return to_float(tf.not_equal(past_first_zero * nonzero, 0))
Example #30
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 5 votes |
def cumsum(x, axis=0, exclusive=False): """TPU hack for tf.cumsum. This is equivalent to tf.cumsum and is faster on TPU as of 04/2018 unless the axis dimension is very large. Args: x: a Tensor axis: an integer exclusive: a boolean Returns: Tensor of the same shape as x. """ if not is_xla_compiled(): return tf.cumsum(x, axis=axis, exclusive=exclusive) x_shape = shape_list(x) rank = len(x_shape) length = x_shape[axis] my_range = tf.range(length) comparator = tf.less if exclusive else tf.less_equal mask = tf.cast( comparator(tf.expand_dims(my_range, 1), tf.expand_dims(my_range, 0)), x.dtype) ret = tf.tensordot(x, mask, axes=[[axis], [0]]) if axis != rank - 1: ret = tf.transpose( ret, list(range(axis)) + [rank - 1] + list(range(axis, rank - 1))) return ret