Python tensorflow.python.framework.ops.convert_to_tensor() Examples
The following are 30
code examples of tensorflow.python.framework.ops.convert_to_tensor().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.python.framework.ops
, or try the search function
.
Example #1
Source File: data_flow_ops.py From lambda-packs with MIT License | 6 votes |
def set_global_step(self, new_global_step, name=None): """Sets the global time step of the accumulator. The operation logs a warning if we attempt to set to a time step that is lower than the accumulator's own time step. Args: new_global_step: Value of new time step. Can be a variable or a constant name: Optional name for the operation. Returns: Operation that sets the accumulator's time step. """ return gen_data_flow_ops.accumulator_set_global_step( self._accumulator_ref, math_ops.to_int64(ops.convert_to_tensor(new_global_step)), name=name)
Example #2
Source File: array_ops.py From lambda-packs with MIT License | 6 votes |
def size_internal(input, name=None, optimize=True, out_type=dtypes.int32): # pylint: disable=redefined-builtin,protected-access """Returns the size of a tensor. Args: input: A `Tensor` or `SparseTensor`. name: A name for the operation (optional). optimize: if true, encode the size as a constant when possible. out_type: (Optional) The specified output type of the operation (`int32` or `int64`). Defaults to tf.int32. Returns: A `Tensor` of type `out_type`. """ with ops.name_scope(name, "Size", [input]) as name: if isinstance( input, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): return gen_math_ops._prod( gen_math_ops.cast(input.dense_shape, out_type), 0, name=name) else: input_tensor = ops.convert_to_tensor(input) input_shape = input_tensor.get_shape() if optimize and input_shape.is_fully_defined(): return constant(input_shape.num_elements(), out_type, name=name) return gen_array_ops.size(input, name=name, out_type=out_type)
Example #3
Source File: layers.py From tensornets with MIT License | 6 votes |
def _lower_bound(inputs, bound, name=None): """Same as tf.maximum, but with helpful gradient for inputs < bound. The gradient is overwritten so that it is passed through if the input is not hitting the bound. If it is, only gradients that push `inputs` higher than the bound are passed through. No gradients are passed through to the bound. Args: inputs: input tensor bound: lower bound for the input tensor name: name for this op Returns: tf.maximum(inputs, bound) """ with ops.name_scope(name, 'GDNLowerBound', [inputs, bound]) as scope: inputs = ops.convert_to_tensor(inputs, name='inputs') bound = ops.convert_to_tensor(bound, name='bound') with ops.get_default_graph().gradient_override_map( {'Maximum': 'GDNLowerBound'}): return math_ops.maximum(inputs, bound, name=scope)
Example #4
Source File: layers.py From tensornets with MIT License | 6 votes |
def flatten(inputs, outputs_collections=None, scope=None): """Flattens the input while maintaining the batch_size. Assumes that the first dimension represents the batch. Args: inputs: A tensor of size [batch_size, ...]. outputs_collections: Collection to add the outputs. scope: Optional scope for name_scope. Returns: A flattened tensor with shape [batch_size, k]. Raises: ValueError: If inputs rank is unknown or less than 2. """ with ops.name_scope(scope, 'Flatten', [inputs]) as sc: inputs = ops.convert_to_tensor(inputs) outputs = core_layers.flatten(inputs) return utils.collect_named_outputs(outputs_collections, sc, outputs)
Example #5
Source File: array_ops.py From lambda-packs with MIT License | 6 votes |
def rank_internal(input, name=None, optimize=True): # pylint: disable=redefined-builtin """Returns the rank of a tensor. Args: input: A `Tensor` or `SparseTensor`. name: A name for the operation (optional). optimize: if true, encode the rank as a constant when possible. Returns: A `Tensor` of type `int32`. """ with ops.name_scope(name, "Rank", [input]) as name: if isinstance( input, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): return gen_array_ops.size(input.dense_shape, name=name) else: input_tensor = ops.convert_to_tensor(input) input_shape = input_tensor.get_shape() if optimize and input_shape.ndims is not None: return constant(input_shape.ndims, dtypes.int32, name=name) return gen_array_ops.rank(input, name=name)
Example #6
Source File: layers.py From tensornets with MIT License | 6 votes |
def dense_to_sparse(tensor, eos_token=0, outputs_collections=None, scope=None): """Converts a dense tensor into a sparse tensor. An example use would be to convert dense labels to sparse ones so that they can be fed to the ctc_loss. Args: tensor: An `int` `Tensor` to be converted to a `Sparse`. eos_token: An integer. It is part of the target label that signifies the end of a sentence. outputs_collections: Collection to add the outputs. scope: Optional scope for name_scope. """ with variable_scope.variable_scope(scope, 'dense_to_sparse', [tensor]) as sc: tensor = ops.convert_to_tensor(tensor) indices = array_ops.where( math_ops.not_equal(tensor, constant_op.constant(eos_token, tensor.dtype))) values = array_ops.gather_nd(tensor, indices) shape = array_ops.shape(tensor, out_type=dtypes.int64) outputs = sparse_tensor.SparseTensor(indices, values, shape) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
Example #7
Source File: tf_image.py From seglink with GNU General Public License v3.0 | 6 votes |
def random_flip_left_right(image, bboxes, seed=None): """Random flip left-right of an image and its bounding boxes. """ def flip_bboxes(bboxes): """Flip bounding boxes coordinates. """ bboxes = tf.stack([bboxes[:, 0], 1 - bboxes[:, 3], bboxes[:, 2], 1 - bboxes[:, 1]], axis=-1) return bboxes # Random flip. Tensorflow implementation. with tf.name_scope('random_flip_left_right'): image = ops.convert_to_tensor(image, name='image') _Check3DImage(image, require_static=False) uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed) mirror_cond = math_ops.less(uniform_random, .5) # Flip image. result = control_flow_ops.cond(mirror_cond, lambda: array_ops.reverse_v2(image, [1]), lambda: image) # Flip bboxes. bboxes = control_flow_ops.cond(mirror_cond, lambda: flip_bboxes(bboxes), lambda: bboxes) return fix_image_flip_shape(image, result), bboxes
Example #8
Source File: array_ops.py From lambda-packs with MIT License | 6 votes |
def shape_internal(input, name=None, optimize=True, out_type=dtypes.int32): # pylint: disable=redefined-builtin """Returns the shape of a tensor. Args: input: A `Tensor` or `SparseTensor`. name: A name for the operation (optional). optimize: if true, encode the shape as a constant when possible. out_type: (Optional) The specified output type of the operation (`int32` or `int64`). Defaults to tf.int32. Returns: A `Tensor` of type `out_type`. """ with ops.name_scope(name, "Shape", [input]) as name: if isinstance( input, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): return gen_math_ops.cast(input.dense_shape, out_type) else: input_tensor = ops.convert_to_tensor(input) input_shape = input_tensor.get_shape() if optimize and input_shape.is_fully_defined(): return constant(input_shape.as_list(), out_type, name=name) return gen_array_ops.shape(input, name=name, out_type=out_type)
Example #9
Source File: data_flow_ops.py From lambda-packs with MIT License | 6 votes |
def apply_grad(self, grad, local_step=0, name=None): """Attempts to apply a gradient to the accumulator. The attempt is silently dropped if the gradient is stale, i.e., local_step is less than the accumulator's global time step. Args: grad: The gradient tensor to be applied. local_step: Time step at which the gradient was computed. name: Optional name for the operation. Returns: The operation that (conditionally) applies a gradient to the accumulator. Raises: ValueError: If grad is of the wrong shape """ grad = ops.convert_to_tensor(grad, self._dtype) grad.get_shape().assert_is_compatible_with(self._shape) local_step = math_ops.to_int64(ops.convert_to_tensor(local_step)) return gen_data_flow_ops.accumulator_apply_gradient( self._accumulator_ref, local_step=local_step, gradient=grad, name=name)
Example #10
Source File: ops.py From opt-mmd with BSD 3-Clause "New" or "Revised" License | 6 votes |
def binary_cross_entropy(preds, targets, name=None): """Computes binary cross entropy given `preds`. For brevity, let `x = `, `z = targets`. The logistic loss is loss(x, z) = - sum_i (x[i] * log(z[i]) + (1 - x[i]) * log(1 - z[i])) Args: preds: A `Tensor` of type `float32` or `float64`. targets: A `Tensor` of the same type and shape as `preds`. """ eps = 1e-12 with ops.op_scope([preds, targets], name, "bce_loss") as name: preds = ops.convert_to_tensor(preds, name="preds") targets = ops.convert_to_tensor(targets, name="targets") return tf.reduce_mean(-(targets * tf.log(preds + eps) + (1. - targets) * tf.log(1. - preds + eps)))
Example #11
Source File: shapes.py From Counterfactual-StoryRW with MIT License | 6 votes |
def shape_list(x): """Returns **static** shape of the input Tensor whenever possible. Args: x: A Tensor. Returns: - If the rank of :attr:`x` is unknown, returns the dynamic shape: \ `tf.shape(x)` - Otherwise, returns a list of dims, each of which is either an `int` \ whenever it can be statically determined, or a scalar Tensor otherwise. """ x = tf.convert_to_tensor(x) # If unknown rank, return dynamic shape if x.get_shape().dims is None: return tf.shape(x) static = x.get_shape().as_list() shape = tf.shape(x) ret = [] for i, dim in enumerate(static): if dim is None: dim = shape[i] ret.append(dim) return ret
Example #12
Source File: tf_helpers.py From Counterfactual-StoryRW with MIT License | 6 votes |
def __init__(self, sample_fn, sample_shape, sample_dtype, start_inputs, end_fn, next_inputs_fn=None): """Initializer. Args: sample_fn: A callable that takes `outputs` and emits tensor `sample_ids`. sample_shape: Either a list of integers, or a 1-D Tensor of type `int32`, the shape of the each sample in the batch returned by `sample_fn`. sample_dtype: the dtype of the sample returned by `sample_fn`. start_inputs: The initial batch of inputs. end_fn: A callable that takes `sample_ids` and emits a `bool` vector shaped `[batch_size]` indicating whether each sample is an end token. next_inputs_fn: (Optional) A callable that takes `sample_ids` and returns the next batch of inputs. If not provided, `sample_ids` is used as the next batch of inputs. """ self._sample_fn = sample_fn self._end_fn = end_fn self._sample_shape = tensor_shape.TensorShape(sample_shape) self._sample_dtype = sample_dtype self._next_inputs_fn = next_inputs_fn self._batch_size = array_ops.shape(start_inputs)[0] self._start_inputs = ops.convert_to_tensor( start_inputs, name="start_inputs")
Example #13
Source File: beam_search_decoder_from_tensorflow.py From tensorflow_end2end_speech_recognition with MIT License | 6 votes |
def _tile_batch(t, multiplier): """Core single-tensor implementation of tile_batch.""" t = ops.convert_to_tensor(t, name="t") shape_t = tf.shape(t) if t.shape.ndims is None or t.shape.ndims < 1: raise ValueError("t must have statically known rank") tiling = [1] * (t.shape.ndims + 1) tiling[1] = multiplier tiled_static_batch_size = ( t.shape[0].value * multiplier if t.shape[0].value is not None else None) tiled = tf.tile(tf.expand_dims(t, 1), tiling) tiled = tf.reshape( tiled, tf.concat(([shape_t[0] * multiplier], shape_t[1:]), 0)) tiled.set_shape( tensor_shape.TensorShape( [tiled_static_batch_size]).concatenate(t.shape[1:])) return tiled
Example #14
Source File: nn_ops.py From lambda-packs with MIT License | 6 votes |
def bias_add_v1(value, bias, name=None): """Adds `bias` to `value`. This is a deprecated version of bias_add and will soon to be removed. This is (mostly) a special case of `tf.add` where `bias` is restricted to 1-D. Broadcasting is supported, so `value` may have any number of dimensions. Unlike `tf.add`, the type of `bias` is allowed to differ from `value` in the case where both types are quantized. Args: value: A `Tensor` with type `float`, `double`, `int64`, `int32`, `uint8`, `int16`, `int8`, `complex64`, or `complex128`. bias: A 1-D `Tensor` with size matching the last dimension of `value`. Must be the same type as `value` unless `value` is a quantized type, in which case a different quantized type may be used. name: A name for the operation (optional). Returns: A `Tensor` with the same type as `value`. """ with ops.name_scope(name, "BiasAddV1", [value, bias]) as name: value = ops.convert_to_tensor(value, name="input") bias = ops.convert_to_tensor(bias, dtype=value.dtype, name="bias") return gen_nn_ops._bias_add_v1(value, bias, name=name)
Example #15
Source File: nn_ops.py From lambda-packs with MIT License | 6 votes |
def crelu(features, name=None): """Computes Concatenated ReLU. Concatenates a ReLU which selects only the positive part of the activation with a ReLU which selects only the *negative* part of the activation. Note that as a result this non-linearity doubles the depth of the activations. Source: [Understanding and Improving Convolutional Neural Networks via Concatenated Rectified Linear Units. W. Shang, et al.](https://arxiv.org/abs/1603.05201) Args: features: A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`, `int16`, or `int8`. name: A name for the operation (optional). Returns: A `Tensor` with the same type as `features`. """ with ops.name_scope(name, "CRelu", [features]) as name: features = ops.convert_to_tensor(features, name="features") c = array_ops.concat([features, -features], -1, name=name) return gen_nn_ops.relu(c)
Example #16
Source File: ops.py From text-to-image with MIT License | 6 votes |
def binary_cross_entropy(preds, targets, name=None): """Computes binary cross entropy given `preds`. For brevity, let `x = `, `z = targets`. The logistic loss is loss(x, z) = - sum_i (x[i] * log(z[i]) + (1 - x[i]) * log(1 - z[i])) Args: preds: A `Tensor` of type `float32` or `float64`. targets: A `Tensor` of the same type and shape as `preds`. """ eps = 1e-12 with ops.op_scope([preds, targets], name, "bce_loss") as name: preds = ops.convert_to_tensor(preds, name="preds") targets = ops.convert_to_tensor(targets, name="targets") return tf.reduce_mean(-(targets * tf.log(preds + eps) + (1. - targets) * tf.log(1. - preds + eps)))
Example #17
Source File: nn_ops.py From lambda-packs with MIT License | 6 votes |
def xw_plus_b(x, weights, biases, name=None): # pylint: disable=invalid-name """Computes matmul(x, weights) + biases. Args: x: a 2D tensor. Dimensions typically: batch, in_units weights: a 2D tensor. Dimensions typically: in_units, out_units biases: a 1D tensor. Dimensions: out_units name: A name for the operation (optional). If not specified "xw_plus_b" is used. Returns: A 2-D Tensor computing matmul(x, weights) + biases. Dimensions typically: batch, out_units. """ with ops.name_scope(name, "xw_plus_b", [x, weights, biases]) as name: x = ops.convert_to_tensor(x, name="x") weights = ops.convert_to_tensor(weights, name="weights") biases = ops.convert_to_tensor(biases, name="biases") mm = math_ops.matmul(x, weights) return bias_add(mm, biases, name=name)
Example #18
Source File: nn_ops.py From lambda-packs with MIT License | 6 votes |
def xw_plus_b_v1(x, weights, biases, name=None): # pylint: disable=invalid-name """Computes matmul(x, weights) + biases. This is a deprecated version of that will soon be removed. Args: x: a 2D tensor. Dimensions typically: batch, in_units weights: a 2D tensor. Dimensions typically: in_units, out_units biases: a 1D tensor. Dimensions: out_units name: A name for the operation (optional). If not specified "xw_plus_b_v1" is used. Returns: A 2-D Tensor computing matmul(x, weights) + biases. Dimensions typically: batch, out_units. """ with ops.name_scope(name, "xw_plus_b_v1", [x, weights, biases]) as name: x = ops.convert_to_tensor(x, name="x") weights = ops.convert_to_tensor(weights, name="weights") biases = ops.convert_to_tensor(biases, name="biases") mm = math_ops.matmul(x, weights) return bias_add_v1(mm, biases, name=name)
Example #19
Source File: common_layers.py From fine-lm with MIT License | 6 votes |
def shape_list(x): """Return list of dims, statically where possible.""" x = tf.convert_to_tensor(x) # If unknown rank, return dynamic shape if x.get_shape().dims is None: return tf.shape(x) static = x.get_shape().as_list() shape = tf.shape(x) ret = [] for i in range(len(static)): dim = static[i] if dim is None: dim = shape[i] ret.append(dim) return ret
Example #20
Source File: distributed_shampoo.py From lingvo with Apache License 2.0 | 5 votes |
def _prepare(self): learning_rate = self._call_if_callable(self._learning_rate) self._learning_rate_tensor = ops.convert_to_tensor( learning_rate, name="learning_rate") momentum = self._call_if_callable(self._momentum) self._momentum_tensor = ops.convert_to_tensor(momentum, name="momentum")
Example #21
Source File: nn_ops.py From lambda-packs with MIT License | 5 votes |
def max_pool(value, ksize, strides, padding, data_format="NHWC", name=None): """Performs the max pooling on the input. Args: value: A 4-D `Tensor` with shape `[batch, height, width, channels]` and type `tf.float32`. ksize: A list of ints that has length >= 4. The size of the window for each dimension of the input tensor. strides: A list of ints that has length >= 4. The stride of the sliding window for each dimension of the input tensor. padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See the @{tf.nn.convolution$comment here} data_format: A string. 'NHWC' and 'NCHW' are supported. name: Optional name for the operation. Returns: A `Tensor` with type `tf.float32`. The max pooled output tensor. """ with ops.name_scope(name, "MaxPool", [value]) as name: value = ops.convert_to_tensor(value, name="input") return gen_nn_ops._max_pool(value, ksize=ksize, strides=strides, padding=padding, data_format=data_format, name=name)
Example #22
Source File: nn_ops.py From lambda-packs with MIT License | 5 votes |
def bias_add(value, bias, data_format=None, name=None): """Adds `bias` to `value`. This is (mostly) a special case of `tf.add` where `bias` is restricted to 1-D. Broadcasting is supported, so `value` may have any number of dimensions. Unlike `tf.add`, the type of `bias` is allowed to differ from `value` in the case where both types are quantized. Args: value: A `Tensor` with type `float`, `double`, `int64`, `int32`, `uint8`, `int16`, `int8`, `complex64`, or `complex128`. bias: A 1-D `Tensor` with size matching the last dimension of `value`. Must be the same type as `value` unless `value` is a quantized type, in which case a different quantized type may be used. data_format: A string. 'NHWC' and 'NCHW' are supported. name: A name for the operation (optional). Returns: A `Tensor` with the same type as `value`. """ with ops.name_scope(name, "BiasAdd", [value, bias]) as name: value = ops.convert_to_tensor(value, name="input") bias = ops.convert_to_tensor(bias, dtype=value.dtype, name="bias") return gen_nn_ops._bias_add(value, bias, data_format=data_format, name=name) # pylint: disable=protected-access
Example #23
Source File: nn_ops.py From lambda-packs with MIT License | 5 votes |
def avg_pool(value, ksize, strides, padding, data_format="NHWC", name=None): """Performs the average pooling on the input. Each entry in `output` is the mean of the corresponding size `ksize` window in `value`. Args: value: A 4-D `Tensor` of shape `[batch, height, width, channels]` and type `float32`, `float64`, `qint8`, `quint8`, or `qint32`. ksize: A list of ints that has length >= 4. The size of the window for each dimension of the input tensor. strides: A list of ints that has length >= 4. The stride of the sliding window for each dimension of the input tensor. padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See the @{tf.nn.convolution$comment here} data_format: A string. 'NHWC' and 'NCHW' are supported. name: Optional name for the operation. Returns: A `Tensor` with the same type as `value`. The average pooled output tensor. """ with ops.name_scope(name, "AvgPool", [value]) as name: value = ops.convert_to_tensor(value, name="input") return gen_nn_ops._avg_pool(value, ksize=ksize, strides=strides, padding=padding, data_format=data_format, name=name)
Example #24
Source File: ResidualGAN.py From Residual_Image_Learning_GAN with MIT License | 5 votes |
def __init__(self, batch_size, max_iters, model_path, data_ob, sample_path, log_dir, learning_rate, l1_w, per_w, recon_w): self.batch_size = batch_size self.max_iters = max_iters self.pixel_model_path = model_path self.data_ob = data_ob self.sample_path = sample_path self.log_dir = log_dir self.learning_rate = learning_rate self.log_vars = [] self.channel = data_ob.channel self.shape = data_ob.shape self.class_nums = 2 self.output_size = data_ob.image_size self.l1_w = l1_w self.per_w = per_w self.recon_w = recon_w self.dom_1_images = tf.placeholder(tf.float32, [batch_size, self.output_size, self.output_size, self.channel]) self.dom_2_images = tf.placeholder(tf.float32, [batch_size, self.output_size, self.output_size, self.channel]) self.dataset = tf.data.Dataset.from_tensor_slices((convert_to_tensor(self.data_ob.dom_1_train_data_list, dtype=tf.string), convert_to_tensor(self.data_ob.dom_2_train_data_list, dtype=tf.string))) self.dataset = self.dataset.shuffle(buffer_size=len(self.data_ob.dom_1_train_data_list)) self.dataset = self.dataset.map(lambda filename1, filename2: tuple( tf.py_func(self._read_by_function, [filename1, filename2], [tf.float32, tf.float32])), num_parallel_calls=32) self.dataset = self.dataset.repeat(50000) self.dataset = self.dataset.apply(tf.contrib.data.batch_and_drop_remainder(batch_size)) self.iterator = tf.data.Iterator.from_structure(self.dataset.output_types, self.dataset.output_shapes) self.next_images1, self.next_images2 = self.iterator.get_next() self.train_init_op = self.iterator.make_initializer(self.dataset) self.domain_label = tf.placeholder(tf.int32, [batch_size]) self.lr_decay = tf.placeholder(tf.float32, None, name='lr_decay')
Example #25
Source File: amsgrad.py From HyperGAN with MIT License | 5 votes |
def _prepare(self): self._lr_t = ops.convert_to_tensor(self._lr) self._beta1_t = ops.convert_to_tensor(self._beta1) self._beta2_t = ops.convert_to_tensor(self._beta2) self._epsilon_t = ops.convert_to_tensor(self._epsilon)
Example #26
Source File: egdd.py From lingvo with Apache License 2.0 | 5 votes |
def _prepare(self): learning_rate = self._call_if_callable(self._learning_rate) self._learning_rate_tensor = ops.convert_to_tensor( learning_rate, name="learning_rate") momentum = self._call_if_callable(self._momentum) self._momentum_tensor = ops.convert_to_tensor(momentum, name="momentum")
Example #27
Source File: egdd.py From lingvo with Apache License 2.0 | 5 votes |
def _create_slots(self, var_list): for v in var_list: self._zeros_slot(v, "momentum", self._name) self._zeros_slot(v, "gbar", self._name) g_tensor = ops.convert_to_tensor(v) gain_init = self._initial_gain * array_ops.ones_like(g_tensor) _ = self._get_or_make_slot(v, self._initial_scale * array_ops.ones((1)), "lr_scale", self._name) _ = self._get_or_make_slot(v, gain_init, "gain", self._name) _ = self._get_or_make_slot(v, array_ops.zeros((1)), "counter", self._name)
Example #28
Source File: adamirror.py From HyperGAN with MIT License | 5 votes |
def _prepare(self): self._lr_t = ops.convert_to_tensor(self._lr, name="learning_rate") self._beta1_t = ops.convert_to_tensor(self._beta1, name="beta1") self._beta2_t = ops.convert_to_tensor(self._beta2, name="beta2")
Example #29
Source File: AdaBound.py From HyperGAN with MIT License | 5 votes |
def _prepare(self): self._lr_t = ops.convert_to_tensor(self._lr) self._base_lr_t = ops.convert_to_tensor(self._lr) self._beta1_t = ops.convert_to_tensor(self._beta1) self._beta2_t = ops.convert_to_tensor(self._beta2) self._epsilon_t = ops.convert_to_tensor(self._epsilon)
Example #30
Source File: common_layers.py From fine-lm with MIT License | 5 votes |
def padded_cross_entropy_factored(factored_logits, labels, label_smoothing, weights_fn=weights_nonzero, reduce_sum=True): """Memory-efficient computation of smoothing cross-entropy. Avoids realizing the entire logits matrix at once. Args: factored_logits: a `FactoredTensor` representing a Tensor with shape `[batch, timesteps, vocab_size]`. labels: an integer `Tensor` with shape `[batch, timesteps]`. label_smoothing: a floating point `Scalar`. weights_fn: A function from labels to weights. reduce_sum: a Boolean, whether to sum at the end or not. Returns: loss_numerator: a `Scalar`. Sum of losses. loss_denominator: a `Scalar. The number of non-padding target tokens. """ a = factored_logits.a b = factored_logits.b confidence = 1.0 - label_smoothing with tf.name_scope("padded_cross_entropy_factored", values=[a, b, labels]): labels_flat = tf.reshape(labels, [-1]) a_flat = tf.reshape(a, [-1, shape_list(b)[1]]) xent = smoothing_cross_entropy_factored(a_flat, b, labels_flat, tf.convert_to_tensor(confidence)) xent = tf.reshape(xent, shape_list(labels)) weights = weights_fn(labels) if not reduce_sum: return xent * weights, weights return tf.reduce_sum(xent * weights), tf.reduce_sum(weights)