Python tensorflow.python.ops.math_ops.reduce_sum() Examples
The following are 30
code examples of tensorflow.python.ops.math_ops.reduce_sum().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.python.ops.math_ops
, or try the search function
.
Example #1
Source File: math_grad.py From lambda-packs with MIT License | 6 votes |
def _RealDivGrad(op, grad): """RealDiv op gradient.""" x = op.inputs[0] y = op.inputs[1] sx = array_ops.shape(x) sy = array_ops.shape(y) # pylint: disable=protected-access rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) # pylint: enable=protected-access x = math_ops.conj(x) y = math_ops.conj(y) return (array_ops.reshape( math_ops.reduce_sum(math_ops.realdiv(grad, y), rx), sx), array_ops.reshape( math_ops.reduce_sum(grad * math_ops.realdiv(math_ops.realdiv(-x, y), y), ry), sy))
Example #2
Source File: tensor_forest.py From lambda-packs with MIT License | 6 votes |
def _gini(self, class_counts): """Calculate the Gini impurity. If c(i) denotes the i-th class count and c = sum_i c(i) then score = 1 - sum_i ( c(i) / c )^2 Args: class_counts: A 2-D tensor of per-class counts, usually a slice or gather from variables.node_sums. Returns: A 1-D tensor of the Gini impurities for each row in the input. """ smoothed = 1.0 + array_ops.slice(class_counts, [0, 1], [-1, -1]) sums = math_ops.reduce_sum(smoothed, 1) sum_squares = math_ops.reduce_sum(math_ops.square(smoothed), 1) return 1.0 - sum_squares / (sums * sums)
Example #3
Source File: tensor_forest.py From lambda-packs with MIT License | 6 votes |
def _variance(self, sums, squares): """Calculate the variance for each row of the input tensors. Variance is V = E[x^2] - (E[x])^2. Args: sums: A tensor containing output sums, usually a slice from variables.node_sums. Should contain the number of examples seen in index 0 so we can calculate expected value. squares: Same as sums, but sums of squares. Returns: A 1-D tensor of the variances for each row in the input. """ total_count = array_ops.slice(sums, [0, 0], [-1, 1]) e_x = sums / total_count e_x2 = squares / total_count return math_ops.reduce_sum(e_x2 - math_ops.square(e_x), 1)
Example #4
Source File: tensor_forest.py From lambda-packs with MIT License | 6 votes |
def _weighted_gini(self, class_counts): """Our split score is the Gini impurity times the number of examples. If c(i) denotes the i-th class count and c = sum_i c(i) then score = c * (1 - sum_i ( c(i) / c )^2 ) = c - sum_i c(i)^2 / c Args: class_counts: A 2-D tensor of per-class counts, usually a slice or gather from variables.node_sums. Returns: A 1-D tensor of the Gini impurities for each row in the input. """ smoothed = 1.0 + array_ops.slice(class_counts, [0, 1], [-1, -1]) sums = math_ops.reduce_sum(smoothed, 1) sum_squares = math_ops.reduce_sum(math_ops.square(smoothed), 1) return sums - sum_squares / sums
Example #5
Source File: dataset_ops.py From lambda-packs with MIT License | 6 votes |
def _estimate_data_distribution(c, num_examples_per_class_seen): """Estimate data distribution as labels are seen. Args: c: The class labels. Type `int32`, shape `[batch_size]`. num_examples_per_class_seen: A `ResourceVariable` containing counts. Type `int64`, shape `[num_classes]`. Returns: dist: The updated distribution. Type `float32`, shape `[num_classes]`. """ num_classes = num_examples_per_class_seen.get_shape()[0].value # Update the class-count based on what labels are seen in # batch. But do this asynchronously to avoid performing a # cross-device round-trip. Just use the cached value. num_examples_per_class_seen = num_examples_per_class_seen.assign_add( math_ops.reduce_sum( array_ops.one_hot(c, num_classes, dtype=dtypes.int64), 0)) init_prob_estimate = math_ops.truediv( num_examples_per_class_seen, math_ops.reduce_sum(num_examples_per_class_seen)) return math_ops.cast(init_prob_estimate, dtypes.float32)
Example #6
Source File: backend.py From lambda-packs with MIT License | 6 votes |
def sum(x, axis=None, keepdims=False): """Sum of the values in a tensor, alongside the specified axis. Arguments: x: A tensor or variable. axis: An integer, the axis to sum over. keepdims: A boolean, whether to keep the dimensions or not. If `keepdims` is `False`, the rank of the tensor is reduced by 1. If `keepdims` is `True`, the reduced dimension is retained with length 1. Returns: A tensor with sum of `x`. """ axis = _normalize_axis(axis, ndim(x)) return math_ops.reduce_sum(x, reduction_indices=axis, keep_dims=keepdims)
Example #7
Source File: array_grad.py From lambda-packs with MIT License | 6 votes |
def _TileGrad(op, grad): """Sum reduces grad along the tiled dimensions.""" assert isinstance(grad, ops.Tensor) input_shape = array_ops.shape(op.inputs[0]) # We interleave multiples and input_shape to get split_shape, # reshape grad to split_shape, and reduce along all even # dimensions (the tiled dimensions) to get the result # with shape input_shape. For example # input_shape = [20, 30, 40] # multiples = [2, 3, 4] # split_shape = [2, 20, 3, 30, 4, 40] # axes = [0, 2, 4] split_shape = array_ops.reshape( array_ops.transpose(array_ops.stack([op.inputs[1], input_shape])), [-1]) axes = math_ops.range(0, array_ops.size(split_shape), 2) input_grad = math_ops.reduce_sum(array_ops.reshape(grad, split_shape), axes) # Fix shape inference input_grad.set_shape(op.inputs[0].get_shape()) return [input_grad, None]
Example #8
Source File: gmm_ops.py From lambda-packs with MIT License | 6 votes |
def _define_partial_maximization_operation(self, shard_id, shard): """Computes the partial statistics of the means and covariances. Args: shard_id: current shard id. shard: current data shard, 1 X num_examples X dimensions. """ # Soft assignment of each data point to each of the two clusters. self._points_in_k[shard_id] = math_ops.reduce_sum( self._w[shard_id], 0, keep_dims=True) # Partial means. w_mul_x = array_ops.expand_dims( math_ops.matmul( self._w[shard_id], array_ops.squeeze(shard, [0]), transpose_a=True), 1) self._w_mul_x.append(w_mul_x) # Partial covariances. x = array_ops.concat([shard for _ in range(self._num_classes)], 0) x_trans = array_ops.transpose(x, perm=[0, 2, 1]) x_mul_w = array_ops.concat([ array_ops.expand_dims(x_trans[k, :, :] * self._w[shard_id][:, k], 0) for k in range(self._num_classes) ], 0) self._w_mul_x2.append(math_ops.matmul(x_mul_w, x))
Example #9
Source File: multinomial.py From lambda-packs with MIT License | 6 votes |
def _sample_n(self, n, seed=None): n_draws = math_ops.cast(self.total_count, dtype=dtypes.int32) if self.total_count.get_shape().ndims is not None: if self.total_count.get_shape().ndims != 0: raise NotImplementedError( "Sample only supported for scalar number of draws.") elif self.validate_args: is_scalar = check_ops.assert_rank( n_draws, 0, message="Sample only supported for scalar number of draws.") n_draws = control_flow_ops.with_dependencies([is_scalar], n_draws) k = self.event_shape_tensor()[0] # Flatten batch dims so logits has shape [B, k], # where B = reduce_prod(self.batch_shape_tensor()). draws = random_ops.multinomial( logits=array_ops.reshape(self.logits, [-1, k]), num_samples=n * n_draws, seed=seed) draws = array_ops.reshape(draws, shape=[-1, n, n_draws]) x = math_ops.reduce_sum(array_ops.one_hot(draws, depth=k), axis=-2) # shape: [B, n, k] x = array_ops.transpose(x, perm=[1, 0, 2]) final_shape = array_ops.concat([[n], self.batch_shape_tensor(), [k]], 0) return array_ops.reshape(x, final_shape)
Example #10
Source File: gmm_ops.py From lambda-packs with MIT License | 6 votes |
def _define_diag_covariance_probs(self, shard_id, shard): """Defines the diagonal covariance probabilities per example in a class. Args: shard_id: id of the current shard. shard: current data shard, 1 X num_examples X dimensions. Returns a matrix num_examples * num_classes. """ # num_classes X 1 # TODO(xavigonzalvo): look into alternatives to log for # reparametrization of variance parameters. det_expanded = math_ops.reduce_sum( math_ops.log(self._covs + 1e-3), 1, keep_dims=True) diff = shard - self._means x2 = math_ops.square(diff) cov_expanded = array_ops.expand_dims(1.0 / (self._covs + 1e-3), 2) # num_classes X num_examples x2_cov = math_ops.matmul(x2, cov_expanded) x2_cov = array_ops.transpose(array_ops.squeeze(x2_cov, [2])) self._probs[shard_id] = -0.5 * ( math_ops.to_float(self._dimensions) * math_ops.log(2.0 * np.pi) + array_ops.transpose(det_expanded) + x2_cov)
Example #11
Source File: gmm_ops.py From lambda-packs with MIT License | 6 votes |
def _covariance(x, diag): """Defines the covariance operation of a matrix. Args: x: a matrix Tensor. Dimension 0 should contain the number of examples. diag: if True, it computes the diagonal covariance. Returns: A Tensor representing the covariance of x. In the case of diagonal matrix just the diagonal is returned. """ num_points = math_ops.to_float(array_ops.shape(x)[0]) x -= math_ops.reduce_mean(x, 0, keep_dims=True) if diag: cov = math_ops.reduce_sum( math_ops.square(x), 0, keep_dims=True) / (num_points - 1) else: cov = math_ops.matmul(x, x, transpose_a=True) / (num_points - 1) return cov
Example #12
Source File: dirichlet_multinomial.py From lambda-packs with MIT License | 6 votes |
def _sample_n(self, n, seed=None): n_draws = math_ops.cast(self.total_count, dtype=dtypes.int32) k = self.event_shape_tensor()[0] unnormalized_logits = array_ops.reshape( math_ops.log(random_ops.random_gamma( shape=[n], alpha=self.concentration, dtype=self.dtype, seed=seed)), shape=[-1, k]) draws = random_ops.multinomial( logits=unnormalized_logits, num_samples=n_draws, seed=distribution_util.gen_new_seed(seed, salt="dirichlet_multinomial")) x = math_ops.reduce_sum(array_ops.one_hot(draws, depth=k), -2) final_shape = array_ops.concat([[n], self.batch_shape_tensor(), [k]], 0) return array_ops.reshape(x, final_shape)
Example #13
Source File: gmm_ops.py From lambda-packs with MIT License | 6 votes |
def _define_full_covariance_probs(self, shard_id, shard): """Defines the full covariance probabilties per example in a class. Updates a matrix with dimension num_examples X num_classes. Args: shard_id: id of the current shard. shard: current data shard, 1 X num_examples X dimensions. """ diff = shard - self._means cholesky = linalg_ops.cholesky(self._covs + self._min_var) log_det_covs = 2.0 * math_ops.reduce_sum( math_ops.log(array_ops.matrix_diag_part(cholesky)), 1) x_mu_cov = math_ops.square( linalg_ops.matrix_triangular_solve( cholesky, array_ops.transpose( diff, perm=[0, 2, 1]), lower=True)) diag_m = array_ops.transpose(math_ops.reduce_sum(x_mu_cov, 1)) self._probs[shard_id] = -0.5 * (diag_m + math_ops.to_float(self._dimensions) * math_ops.log(2 * np.pi) + log_det_covs)
Example #14
Source File: math_grad.py From lambda-packs with MIT License | 6 votes |
def _MaximumMinimumGrad(op, grad, selector_op): """Factor out the code for the gradient of Maximum or Minimum.""" x = op.inputs[0] y = op.inputs[1] gdtype = grad.dtype sx = array_ops.shape(x) sy = array_ops.shape(y) gradshape = array_ops.shape(grad) zeros = array_ops.zeros(gradshape, gdtype) xmask = selector_op(x, y) rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) xgrad = array_ops.where(xmask, grad, zeros) ygrad = array_ops.where(math_ops.logical_not(xmask), grad, zeros) gx = array_ops.reshape(math_ops.reduce_sum(xgrad, rx), sx) gy = array_ops.reshape(math_ops.reduce_sum(ygrad, ry), sy) return (gx, gy)
Example #15
Source File: math_grad.py From lambda-packs with MIT License | 6 votes |
def _SquaredDifferenceGrad(op, grad): """Returns the gradient for (x-y)^2.""" x = op.inputs[0] y = op.inputs[1] sx = array_ops.shape(x) sy = array_ops.shape(y) # pylint: disable=protected-access rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) # pylint: enable=protected-access # .op works with Tensors or IndexedSlices with ops.control_dependencies([grad.op]): # The parens ensure that if grad is IndexedSlices, it'll get multiplied by # Tensor (not a number like 2.0) which causes it to convert to Tensor. x_grad = math_ops.scalar_mul(2.0, grad) * (x - y) return (array_ops.reshape(math_ops.reduce_sum(x_grad, rx), sx), -array_ops.reshape(math_ops.reduce_sum(x_grad, ry), sy)) # Logical operations have no gradients.
Example #16
Source File: math_grad.py From lambda-packs with MIT License | 6 votes |
def _ZetaGrad(op, grad): """Returns gradient of zeta(x, q) with respect to x and q.""" # TODO(tillahoffmann): Add derivative with respect to x x = op.inputs[0] q = op.inputs[1] # Broadcast gradients sx = array_ops.shape(x) sq = array_ops.shape(q) unused_rx, rq = gen_array_ops._broadcast_gradient_args(sx, sq) # Evaluate gradient with ops.control_dependencies([grad.op]): x = math_ops.conj(x) q = math_ops.conj(q) partial_q = -x * math_ops.zeta(x + 1, q) # TODO(b/36815900): Mark None return values as NotImplemented return (None, array_ops.reshape(math_ops.reduce_sum(partial_q * grad, rq), sq))
Example #17
Source File: math_grad.py From lambda-packs with MIT License | 6 votes |
def _BetaincGrad(op, grad): """Returns gradient of betainc(a, b, x) with respect to x.""" # TODO(ebrevdo): Perhaps add the derivative w.r.t. a, b a, b, x = op.inputs # two cases: x is a scalar and a/b are same-shaped tensors, or vice # versa; so its sufficient to check against shape(a). sa = array_ops.shape(a) sx = array_ops.shape(x) # pylint: disable=protected-access _, rx = gen_array_ops._broadcast_gradient_args(sa, sx) # pylint: enable=protected-access # Perform operations in log space before summing, because terms # can grow large. log_beta = (gen_math_ops.lgamma(a) + gen_math_ops.lgamma(b) - gen_math_ops.lgamma(a + b)) partial_x = math_ops.exp( (b - 1) * math_ops.log(1 - x) + (a - 1) * math_ops.log(x) - log_beta) # TODO(b/36815900): Mark None return values as NotImplemented return (None, # da None, # db array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx))
Example #18
Source File: math_grad.py From lambda-packs with MIT License | 6 votes |
def _MinOrMaxGrad(op, grad): """Gradient for Min or Max. Amazingly it's precisely the same code.""" input_shape = array_ops.shape(op.inputs[0]) output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) y = op.outputs[0] y = array_ops.reshape(y, output_shape_kept_dims) grad = array_ops.reshape(grad, output_shape_kept_dims) # Compute the number of selected (maximum or minimum) elements in each # reduction dimension. If there are multiple minimum or maximum elements # then the gradient will be divided between them. indicators = math_ops.cast(math_ops.equal(y, op.inputs[0]), grad.dtype) num_selected = array_ops.reshape( math_ops.reduce_sum(indicators, op.inputs[1]), output_shape_kept_dims) return [math_ops.div(indicators, num_selected) * grad, None]
Example #19
Source File: categorical.py From lambda-packs with MIT License | 6 votes |
def _kl_categorical_categorical(a, b, name=None): """Calculate the batched KL divergence KL(a || b) with a and b Categorical. Args: a: instance of a Categorical distribution object. b: instance of a Categorical distribution object. name: (optional) Name to use for created operations. default is "kl_categorical_categorical". Returns: Batchwise KL(a || b) """ with ops.name_scope(name, "kl_categorical_categorical", values=[a.logits, b.logits]): # sum(probs log(probs / (1 - probs))) delta_log_probs1 = (nn_ops.log_softmax(a.logits) - nn_ops.log_softmax(b.logits)) return math_ops.reduce_sum(nn_ops.softmax(a.logits) * delta_log_probs1, axis=-1)
Example #20
Source File: rnn_cell.py From lambda-packs with MIT License | 6 votes |
def _attention(self, query, attn_states): conv2d = nn_ops.conv2d reduce_sum = math_ops.reduce_sum softmax = nn_ops.softmax tanh = math_ops.tanh with vs.variable_scope("attention"): k = vs.get_variable( "attn_w", [1, 1, self._attn_size, self._attn_vec_size]) v = vs.get_variable("attn_v", [self._attn_vec_size]) hidden = array_ops.reshape(attn_states, [-1, self._attn_length, 1, self._attn_size]) hidden_features = conv2d(hidden, k, [1, 1, 1, 1], "SAME") y = _linear(query, self._attn_vec_size, True) y = array_ops.reshape(y, [-1, 1, 1, self._attn_vec_size]) s = reduce_sum(v * tanh(hidden_features + y), [2, 3]) a = softmax(s) d = reduce_sum( array_ops.reshape(a, [-1, self._attn_length, 1, 1]) * hidden, [1, 2]) new_attns = array_ops.reshape(d, [-1, self._attn_size]) new_attn_states = array_ops.slice(attn_states, [0, 1, 0], [-1, -1, -1]) return new_attns, new_attn_states
Example #21
Source File: clustering_ops.py From lambda-packs with MIT License | 6 votes |
def _compute_euclidean_distance(cls, inputs, clusters): """Computes Euclidean distance between each input and each cluster center. Args: inputs: list of input Tensors. clusters: cluster Tensor. Returns: list of Tensors, where each element corresponds to each element in inputs. The value is the distance of each row to all the cluster centers. """ output = [] for inp in inputs: with ops.colocate_with(inp): # Computes Euclidean distance. Note the first and third terms are # broadcast additions. squared_distance = (math_ops.reduce_sum( math_ops.square(inp), 1, keep_dims=True) - 2 * math_ops.matmul( inp, clusters, transpose_b=True) + array_ops.transpose( math_ops.reduce_sum( math_ops.square(clusters), 1, keep_dims=True))) output.append(squared_distance) return output
Example #22
Source File: losses_impl.py From lambda-packs with MIT License | 5 votes |
def _safe_mean(losses, num_present): """Computes a safe mean of the losses. Args: losses: `Tensor` whose elements contain individual loss measurements. num_present: The number of measurable elements in `losses`. Returns: A scalar representing the mean of `losses`. If `num_present` is zero, then zero is returned. """ total_loss = math_ops.reduce_sum(losses) return _safe_div(total_loss, num_present)
Example #23
Source File: seq2seq.py From lambda-packs with MIT License | 5 votes |
def sequence_loss(logits, targets, weights, average_across_timesteps=True, average_across_batch=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (labels, logits) -> loss-batch to be used instead of the standard softmax (the default if this is None). **Note that to avoid confusion, it is required for the function to accept named arguments.** name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ with ops.name_scope(name, "sequence_loss", logits + targets + weights): cost = math_ops.reduce_sum( sequence_loss_by_example( logits, targets, weights, average_across_timesteps=average_across_timesteps, softmax_loss_function=softmax_loss_function)) if average_across_batch: batch_size = array_ops.shape(targets[0])[0] return cost / math_ops.cast(batch_size, cost.dtype) else: return cost
Example #24
Source File: nn_grad.py From lambda-packs with MIT License | 5 votes |
def _LogSoftmaxGrad(op, grad): """The gradient for log_softmax. log_softmax = input - log(sum(exp(input)) dlog_softmax/dinput = diag - softmax(input) Args: op: The log softmax op. grad: The tensor representing the gradient w.r.t. the output. Returns: The gradients w.r.t. the input. """ softmax = math_ops.exp(op.outputs[0]) return grad - math_ops.reduce_sum(grad, 1, keep_dims=True) * softmax
Example #25
Source File: eval_metrics.py From lambda-packs with MIT License | 5 votes |
def _r2(probabilities, targets, weights=None): if targets.get_shape().ndims == 1: targets = array_ops.expand_dims(targets, -1) targets = math_ops.to_float(targets) y_mean = math_ops.reduce_mean(targets, 0) squares_total = math_ops.reduce_sum(math_ops.square(targets - y_mean), 0) squares_residuals = math_ops.reduce_sum( math_ops.square(targets - probabilities), 0) score = 1 - math_ops.reduce_sum(squares_residuals / squares_total) return metric_ops.streaming_mean(score, weights=weights)
Example #26
Source File: math_grad.py From lambda-packs with MIT License | 5 votes |
def _ComplexGrad(op, grad): """Returns the real and imaginary components of 'grad', respectively.""" x = op.inputs[0] y = op.inputs[1] sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) return (array_ops.reshape(math_ops.reduce_sum(math_ops.real(grad), rx), sx), array_ops.reshape(math_ops.reduce_sum(math_ops.imag(grad), ry), sy))
Example #27
Source File: gmm_ops.py From lambda-packs with MIT License | 5 votes |
def _define_loglikelihood_operation(self): """Defines the total log-likelihood of current iteration.""" self._ll_op = [] for prior_probs in self._prior_probs: self._ll_op.append(math_ops.reduce_sum(math_ops.log(prior_probs))) summary.scalar('ll', math_ops.reduce_sum(self._ll_op))
Example #28
Source File: metric_ops.py From lambda-packs with MIT License | 5 votes |
def _broadcast_weights(weights, values): """Broadcast `weights` to the same shape as `values`. This returns a version of `weights` following the same broadcast rules as `mul(weights, values)`. When computing a weighted average, use this function to broadcast `weights` before summing them; e.g., `reduce_sum(w * v) / reduce_sum(_broadcast_weights(w, v))`. Args: weights: `Tensor` whose rank is either 0, or the same rank as `values`, and must be broadcastable to `values` (i.e., all dimensions must be either `1`, or the same as the corresponding `values` dimension). values: `Tensor` of any shape. Returns: `weights` broadcast to `values` shape. """ with ops.name_scope(None, 'broadcast_weights', (values, weights)) as scope: weights_shape = weights.get_shape() values_shape = values.get_shape() if (weights_shape.is_fully_defined() and values_shape.is_fully_defined() and weights_shape.is_compatible_with(values_shape)): return weights with ops.control_dependencies((_assert_weights_rank(weights, values),)): return math_ops.multiply( weights, array_ops.ones_like(values), name=scope)
Example #29
Source File: classification.py From lambda-packs with MIT License | 5 votes |
def accuracy(predictions, labels, weights=None, name=None): """Computes the percentage of times that predictions matches labels. Args: predictions: the predicted values, a `Tensor` whose dtype and shape matches 'labels'. labels: the ground truth values, a `Tensor` of any shape and bool, integer, or string dtype. weights: None or `Tensor` of float values to reweight the accuracy. name: A name for the operation (optional). Returns: Accuracy `Tensor`. Raises: ValueError: if dtypes don't match or if dtype is not bool, integer, or string. """ if not (labels.dtype.is_integer or labels.dtype in (dtypes.bool, dtypes.string)): raise ValueError( 'Labels should have bool, integer, or string dtype, not %r' % labels.dtype) if not labels.dtype.is_compatible_with(predictions.dtype): raise ValueError('Dtypes of predictions and labels should match. ' 'Given: predictions (%r) and labels (%r)' % (predictions.dtype, labels.dtype)) with ops.name_scope(name, 'accuracy', values=[predictions, labels]): is_correct = math_ops.cast( math_ops.equal(predictions, labels), dtypes.float32) if weights is not None: is_correct = math_ops.multiply(is_correct, weights) num_values = math_ops.multiply(weights, array_ops.ones_like(is_correct)) return math_ops.div(math_ops.reduce_sum(is_correct), math_ops.reduce_sum(num_values)) return math_ops.reduce_mean(is_correct)
Example #30
Source File: nn_grad.py From lambda-packs with MIT License | 5 votes |
def _SoftmaxGrad(op, grad_softmax): """The derivative of the softmax nonlinearity. We assume that probs is of shape [batch_size * dim] The formula for dsoftmax / dx = (diag(softmax) - softmax * softmax'). This matrix is diagonal minus a rank one matrix, so it is easy to implement as follows: grad_x = grad_softmax * softmax - sum(grad_softmax * softmax) * softmax Args: op: the Softmax op. grad_softmax: the tensor representing the gradient w.r.t. the softmax output. Returns: gradient w.r.t the input to the softmax """ # TODO(ilyasu): assert that the tensor has two dimensions at # graph-construction time? Alternatively: do different things # depending on the dimensionality of the input tensors. softmax = op.outputs[0] grad_x = ((grad_softmax - array_ops.reshape( math_ops.reduce_sum(grad_softmax * softmax, [1]), [-1, 1])) * softmax) return grad_x