Python tensorflow.python.ops.array_ops.gather() Examples
The following are 30
code examples of tensorflow.python.ops.array_ops.gather().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.python.ops.array_ops
, or try the search function
.
Example #1
Source File: operator_pd.py From auto-alt-text-lambda-api with MIT License | 6 votes |
def vector_space_dimension(self, name="vector_space_dimension"): """Dimension of vector space on which this acts. The `k` in `R^k`. If this operator represents the batch matrix `A` with `A.shape = [N1,...,Nn, k, k]`, the `vector_space_dimension` is `k`. Args: name: A name scope to use for ops added by this method. Returns: `int32` `Tensor` """ # Derived classes get this "for free" once .shape() is implemented. with ops.name_scope(self.name): with ops.name_scope(name, values=self.inputs): return array_ops.gather(self.shape(), self.rank() - 1)
Example #2
Source File: math_grad.py From lambda-packs with MIT License | 6 votes |
def _SegmentMinOrMaxGrad(op, grad, is_sorted): """Gradient for SegmentMin and (unsorted) SegmentMax. They share similar code.""" zeros = array_ops.zeros(array_ops.shape(op.inputs[0]), dtype=op.inputs[0].dtype) # Get the number of selected (minimum or maximum) elements in each segment. gathered_outputs = array_ops.gather(op.outputs[0], op.inputs[1]) is_selected = math_ops.equal(op.inputs[0], gathered_outputs) if is_sorted: num_selected = math_ops.segment_sum(math_ops.cast(is_selected, grad.dtype), op.inputs[1]) else: num_selected = math_ops.unsorted_segment_sum(math_ops.cast(is_selected, grad.dtype), op.inputs[1], op.inputs[2]) # Compute the gradient for each segment. The gradient for the ith segment is # divided evenly among the selected elements in that segment. weighted_grads = math_ops.div(grad, num_selected) gathered_grads = array_ops.gather(weighted_grads, op.inputs[1]) if is_sorted: return array_ops.where(is_selected, gathered_grads, zeros), None else: return array_ops.where(is_selected, gathered_grads, zeros), None, None
Example #3
Source File: gradients_impl.py From lambda-packs with MIT License | 6 votes |
def _MultiDeviceAddN(tensor_list): """Adds tensors from potentially multiple devices.""" # Basic function structure comes from control_flow_ops.group(). # Sort tensors according to their devices. tensors_on_device = collections.defaultdict(lambda: []) for tensor in tensor_list: tensors_on_device[tensor.device].append(tensor) # For each device, add the tensors on that device first. # Then gather the partial sums from multiple devices. # TODO(sjhwang): Create hierarchical aggregation tree as pbar's suggestion. # E.g., aggregate per GPU, then per task, and so on. summands = [] def DeviceKey(dev): return "" if dev is None else dev for dev in sorted(six.iterkeys(tensors_on_device), key=DeviceKey): tensors = tensors_on_device[dev] with ops.colocate_with(tensors[0].op, ignore_existing=True): summands.append(math_ops.add_n(tensors)) return math_ops.add_n(summands)
Example #4
Source File: data_flow_grad.py From lambda-packs with MIT License | 6 votes |
def _DynamicStitchGrads(op, grad): """Gradients for DynamicStitch.""" num_values = len(op.inputs) // 2 indices_grad = [None] * num_values def AsInt32(x): return (x if op.inputs[0].dtype == dtypes.int32 else math_ops.cast(x, dtypes.int32)) inputs = [AsInt32(op.inputs[i]) for i in xrange(num_values)] if isinstance(grad, ops.IndexedSlices): output_shape = array_ops.shape(op.outputs[0]) output_rows = output_shape[0] grad = math_ops.unsorted_segment_sum(grad.values, grad.indices, output_rows) values_grad = [array_ops.gather(grad, inp) for inp in inputs] return indices_grad + values_grad
Example #5
Source File: optimizer.py From tensorflow-XNN with MIT License | 6 votes |
def _apply_sparse(self, grad, var): lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) alpha_t = math_ops.cast(self._alpha_t, var.dtype.base_dtype) beta_t = math_ops.cast(self._beta_t, var.dtype.base_dtype) eps = 1e-7 # cap for moving average m = self.get_slot(var, "m") m_slice = tf.gather(m, grad.indices) m_t = state_ops.scatter_update(m, grad.indices, tf.maximum(beta_t * m_slice + eps, tf.abs(grad.values))) m_t_slice = tf.gather(m_t, grad.indices) var_update = state_ops.scatter_sub(var, grad.indices, lr_t * grad.values * tf.exp( tf.log(alpha_t) * tf.sign(grad.values) * tf.sign(m_t_slice))) # Update 'ref' by subtracting 'value # Create an op that groups multiple operations. # When this op finishes, all ops in input have finished return control_flow_ops.group(*[var_update, m_t])
Example #6
Source File: tensor_forest.py From lambda-packs with MIT License | 6 votes |
def _gini(self, class_counts): """Calculate the Gini impurity. If c(i) denotes the i-th class count and c = sum_i c(i) then score = 1 - sum_i ( c(i) / c )^2 Args: class_counts: A 2-D tensor of per-class counts, usually a slice or gather from variables.node_sums. Returns: A 1-D tensor of the Gini impurities for each row in the input. """ smoothed = 1.0 + array_ops.slice(class_counts, [0, 1], [-1, -1]) sums = math_ops.reduce_sum(smoothed, 1) sum_squares = math_ops.reduce_sum(math_ops.square(smoothed), 1) return 1.0 - sum_squares / (sums * sums)
Example #7
Source File: tensor_forest.py From lambda-packs with MIT License | 6 votes |
def _weighted_gini(self, class_counts): """Our split score is the Gini impurity times the number of examples. If c(i) denotes the i-th class count and c = sum_i c(i) then score = c * (1 - sum_i ( c(i) / c )^2 ) = c - sum_i c(i)^2 / c Args: class_counts: A 2-D tensor of per-class counts, usually a slice or gather from variables.node_sums. Returns: A 1-D tensor of the Gini impurities for each row in the input. """ smoothed = 1.0 + array_ops.slice(class_counts, [0, 1], [-1, -1]) sums = math_ops.reduce_sum(smoothed, 1) sum_squares = math_ops.reduce_sum(math_ops.square(smoothed), 1) return sums - sum_squares / sums
Example #8
Source File: tensor_forest.py From lambda-packs with MIT License | 6 votes |
def average_impurity(self): """Constructs a TF graph for evaluating the average leaf impurity of a tree. If in regression mode, this is the leaf variance. If in classification mode, this is the gini impurity. Returns: The last op in the graph. """ children = array_ops.squeeze(array_ops.slice( self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1]) is_leaf = math_ops.equal(constants.LEAF_NODE, children) leaves = math_ops.to_int32(array_ops.squeeze(array_ops.where(is_leaf), squeeze_dims=[1])) counts = array_ops.gather(self.variables.node_sums, leaves) gini = self._weighted_gini(counts) # Guard against step 1, when there often are no leaves yet. def impurity(): return gini # Since average impurity can be used for loss, when there's no data just # return a big number so that loss always decreases. def big(): return array_ops.ones_like(gini, dtype=dtypes.float32) * 10000000. return control_flow_ops.cond(math_ops.greater( array_ops.shape(leaves)[0], 0), impurity, big)
Example #9
Source File: sdca_ops.py From lambda-packs with MIT License | 6 votes |
def _linear_predictions(self, examples): """Returns predictions of the form w*x.""" with name_scope('sdca/prediction'): sparse_variables = self._convert_n_to_tensor(self._variables[ 'sparse_features_weights']) result = 0.0 for sfc, sv in zip(examples['sparse_features'], sparse_variables): # TODO(sibyl-Aix6ihai): following does not take care of missing features. result += math_ops.segment_sum( math_ops.multiply( array_ops.gather(sv, sfc.feature_indices), sfc.feature_values), sfc.example_indices) dense_features = self._convert_n_to_tensor(examples['dense_features']) dense_variables = self._convert_n_to_tensor(self._variables[ 'dense_features_weights']) for i in range(len(dense_variables)): result += math_ops.matmul(dense_features[i], array_ops.expand_dims(dense_variables[i], -1)) # Reshaping to allow shape inference at graph construction time. return array_ops.reshape(result, [-1])
Example #10
Source File: operator_pd_identity.py From lambda-packs with MIT License | 6 votes |
def _check_shape(self, shape): """Check that the init arg `shape` defines a valid operator.""" shape = ops.convert_to_tensor(shape, name="shape") if not self._verify_pd: return shape # Further checks are equivalent to verification that this is positive # definite. Why? Because the further checks simply check that this is a # square matrix, and combining the fact that this is square (and thus maps # a vector space R^k onto itself), with the behavior of .matmul(), this must # be the identity operator. rank = array_ops.size(shape) assert_matrix = check_ops.assert_less_equal(2, rank) with ops.control_dependencies([assert_matrix]): last_dim = array_ops.gather(shape, rank - 1) second_to_last_dim = array_ops.gather(shape, rank - 2) assert_square = check_ops.assert_equal(last_dim, second_to_last_dim) return control_flow_ops.with_dependencies([assert_matrix, assert_square], shape)
Example #11
Source File: operator_pd.py From lambda-packs with MIT License | 6 votes |
def vector_space_dimension(self, name="vector_space_dimension"): """Dimension of vector space on which this acts. The `k` in `R^k`. If this operator represents the batch matrix `A` with `A.shape = [N1,...,Nn, k, k]`, the `vector_space_dimension` is `k`. Args: name: A name scope to use for ops added by this method. Returns: `int32` `Tensor` """ # Derived classes get this "for free" once .shape() is implemented. with ops.name_scope(self.name): with ops.name_scope(name, values=self.inputs): return array_ops.gather(self.shape(), self.rank() - 1)
Example #12
Source File: operator_pd_vdvt_update.py From lambda-packs with MIT License | 6 votes |
def _get_identity_operator(self, v): """Get an `OperatorPDIdentity` to play the role of `D` in `VDV^T`.""" with ops.name_scope("get_identity_operator", values=[v]): if v.get_shape().is_fully_defined(): v_shape = v.get_shape().as_list() v_batch_shape = v_shape[:-2] r = v_shape[-1] id_shape = v_batch_shape + [r, r] else: v_shape = array_ops.shape(v) v_rank = array_ops.rank(v) v_batch_shape = array_ops.strided_slice(v_shape, [0], [v_rank - 2]) r = array_ops.gather(v_shape, v_rank - 1) # Last dim of v id_shape = array_ops.concat((v_batch_shape, [r, r]), 0) return operator_pd_identity.OperatorPDIdentity( id_shape, v.dtype, verify_pd=self._verify_pd)
Example #13
Source File: optimizer.py From tensorflow-XNN with MIT License | 6 votes |
def _apply_sparse(self, grad, var): lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) beta_t = math_ops.cast(self._beta_t, var.dtype.base_dtype) alpha_t = math_ops.cast(self._alpha_t, var.dtype.base_dtype) eps = 1e-7 # cap for moving average m = self.get_slot(var, "m") m_slice = tf.gather(m, grad.indices) m_t = state_ops.scatter_update(m, grad.indices, tf.maximum(beta_t * m_slice + eps, tf.abs(grad.values))) m_t_slice = tf.gather(m_t, grad.indices) var_update = state_ops.scatter_sub(var, grad.indices, lr_t * grad.values * ( 1.0 + alpha_t * tf.sign(grad.values) * tf.sign(m_t_slice))) # Create an op that groups multiple operations # When this op finishes, all ops in input have finished return control_flow_ops.group(*[var_update, m_t])
Example #14
Source File: sdca_ops.py From auto-alt-text-lambda-api with MIT License | 6 votes |
def _linear_predictions(self, examples): """Returns predictions of the form w*x.""" with name_scope('sdca/prediction'): sparse_variables = self._convert_n_to_tensor(self._variables[ 'sparse_features_weights']) result = 0.0 for sfc, sv in zip(examples['sparse_features'], sparse_variables): # TODO(sibyl-Aix6ihai): following does not take care of missing features. result += math_ops.segment_sum( math_ops.multiply( array_ops.gather(sv, sfc.feature_indices), sfc.feature_values), sfc.example_indices) dense_features = self._convert_n_to_tensor(examples['dense_features']) dense_variables = self._convert_n_to_tensor(self._variables[ 'dense_features_weights']) for i in range(len(dense_variables)): result += math_ops.matmul(dense_features[i], array_ops.expand_dims(dense_variables[i], -1)) # Reshaping to allow shape inference at graph construction time. return array_ops.reshape(result, [-1])
Example #15
Source File: tensor_forest.py From auto-alt-text-lambda-api with MIT License | 6 votes |
def average_impurity(self): """Constructs a TF graph for evaluating the average leaf impurity of a tree. If in regression mode, this is the leaf variance. If in classification mode, this is the gini impurity. Returns: The last op in the graph. """ children = array_ops.squeeze(array_ops.slice( self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1]) is_leaf = math_ops.equal(constants.LEAF_NODE, children) leaves = math_ops.to_int32(array_ops.squeeze(array_ops.where(is_leaf), squeeze_dims=[1])) counts = array_ops.gather(self.variables.node_sums, leaves) gini = self._weighted_gini(counts) # Guard against step 1, when there often are no leaves yet. def impurity(): return gini # Since average impurity can be used for loss, when there's no data just # return a big number so that loss always decreases. def big(): return array_ops.ones_like(gini, dtype=dtypes.float32) * 10000000. return control_flow_ops.cond(math_ops.greater( array_ops.shape(leaves)[0], 0), impurity, big)
Example #16
Source File: tensor_forest.py From auto-alt-text-lambda-api with MIT License | 6 votes |
def _weighted_gini(self, class_counts): """Our split score is the Gini impurity times the number of examples. If c(i) denotes the i-th class count and c = sum_i c(i) then score = c * (1 - sum_i ( c(i) / c )^2 ) = c - sum_i c(i)^2 / c Args: class_counts: A 2-D tensor of per-class counts, usually a slice or gather from variables.node_sums. Returns: A 1-D tensor of the Gini impurities for each row in the input. """ smoothed = 1.0 + array_ops.slice(class_counts, [0, 1], [-1, -1]) sums = math_ops.reduce_sum(smoothed, 1) sum_squares = math_ops.reduce_sum(math_ops.square(smoothed), 1) return sums - sum_squares / sums
Example #17
Source File: tensor_forest.py From auto-alt-text-lambda-api with MIT License | 6 votes |
def _gini(self, class_counts): """Calculate the Gini impurity. If c(i) denotes the i-th class count and c = sum_i c(i) then score = 1 - sum_i ( c(i) / c )^2 Args: class_counts: A 2-D tensor of per-class counts, usually a slice or gather from variables.node_sums. Returns: A 1-D tensor of the Gini impurities for each row in the input. """ smoothed = 1.0 + array_ops.slice(class_counts, [0, 1], [-1, -1]) sums = math_ops.reduce_sum(smoothed, 1) sum_squares = math_ops.reduce_sum(math_ops.square(smoothed), 1) return 1.0 - sum_squares / (sums * sums)
Example #18
Source File: math_grad.py From auto-alt-text-lambda-api with MIT License | 6 votes |
def _SegmentMinOrMaxGrad(op, grad): """Gradient for SegmentMin and SegmentMax. Both share the same code.""" zeros = array_ops.zeros( array_ops.shape(op.inputs[0]), dtype=op.inputs[0].dtype) # Get the number of selected (minimum or maximum) elements in each segment. gathered_outputs = array_ops.gather(op.outputs[0], op.inputs[1]) is_selected = math_ops.equal(op.inputs[0], gathered_outputs) num_selected = math_ops.segment_sum( math_ops.cast(is_selected, grad.dtype), op.inputs[1]) # Compute the gradient for each segment. The gradient for the ith segment is # divided evenly among the selected elements in that segment. weighted_grads = math_ops.div(grad, num_selected) gathered_grads = array_ops.gather(weighted_grads, op.inputs[1]) return array_ops.where(is_selected, gathered_grads, zeros), None
Example #19
Source File: data_flow_grad.py From auto-alt-text-lambda-api with MIT License | 6 votes |
def _DynamicStitchGrads(op, grad): """Gradients for DynamicStitch.""" num_values = len(op.inputs) // 2 indices_grad = [None] * num_values def AsInt32(x): return (x if op.inputs[0].dtype == dtypes.int32 else math_ops.cast(x, dtypes.int32)) inputs = [AsInt32(op.inputs[i]) for i in xrange(num_values)] if isinstance(grad, ops.IndexedSlices): output_shape = array_ops.shape(op.outputs[0]) output_rows = output_shape[0] grad = math_ops.unsorted_segment_sum(grad.values, grad.indices, output_rows) values_grad = [array_ops.gather(grad, inp) for inp in inputs] return indices_grad + values_grad
Example #20
Source File: gradients_impl.py From auto-alt-text-lambda-api with MIT License | 6 votes |
def _MultiDeviceAddN(tensor_list): """Adds tensors from potentially multiple devices.""" # Basic function structure comes from control_flow_ops.group(). # Sort tensors according to their devices. tensors_on_device = collections.defaultdict(lambda: []) for tensor in tensor_list: tensors_on_device[tensor.device].append(tensor) # For each device, add the tensors on that device first. # Then gather the partial sums from multiple devices. # TODO(sjhwang): Create hierarchical aggregation tree as pbar's suggestion. # E.g., aggregate per GPU, then per task, and so on. summands = [] def DeviceKey(dev): return "" if dev is None else dev for dev in sorted(six.iterkeys(tensors_on_device), key=DeviceKey): tensors = tensors_on_device[dev] with ops.colocate_with(tensors[0].op, ignore_existing=True): summands.append(math_ops.add_n(tensors)) return math_ops.add_n(summands)
Example #21
Source File: variable_clipping_optimizer.py From lambda-packs with MIT License | 6 votes |
def _clip_sparse(self, grad, var): assert isinstance(grad, ops.IndexedSlices) clip_dims = self._vars_to_clip_dims[var] if 0 in clip_dims: logging.warning("Clipping norm across dims %s for %s is inefficient " "when including sparse dimension 0.", clip_dims, var.op.name) return self._clip_dense(var) with ops.colocate_with(var): var_subset = array_ops.gather(var, grad.indices) with self._maybe_colocate_with(var): normalized_var_subset = clip_ops.clip_by_norm( var_subset, self._max_norm, clip_dims) delta = ops.IndexedSlices( var_subset - normalized_var_subset, grad.indices, grad.dense_shape) with ops.colocate_with(var): return var.scatter_sub(delta, use_locking=self._use_locking)
Example #22
Source File: operator_pd_cholesky.py From lambda-packs with MIT License | 6 votes |
def _check_chol(self, chol): """Verify that `chol` is proper.""" chol = ops.convert_to_tensor(chol, name="chol") if not self.verify_pd: return chol shape = array_ops.shape(chol) rank = array_ops.rank(chol) is_matrix = check_ops.assert_rank_at_least(chol, 2) is_square = check_ops.assert_equal( array_ops.gather(shape, rank - 2), array_ops.gather(shape, rank - 1)) deps = [is_matrix, is_square] diag = array_ops.matrix_diag_part(chol) deps.append(check_ops.assert_positive(diag)) return control_flow_ops.with_dependencies(deps, chol)
Example #23
Source File: sparse_ops.py From auto-alt-text-lambda-api with MIT License | 5 votes |
def sparse_retain(sp_input, to_retain): """Retains specified non-empty values within a `SparseTensor`. For example, if `sp_input` has shape `[4, 5]` and 4 non-empty string values: [0, 1]: a [0, 3]: b [2, 0]: c [3, 1]: d and `to_retain = [True, False, False, True]`, then the output will be a `SparseTensor` of shape `[4, 5]` with 2 non-empty values: [0, 1]: a [3, 1]: d Args: sp_input: The input `SparseTensor` with `N` non-empty elements. to_retain: A bool vector of length `N` with `M` true values. Returns: A `SparseTensor` with the same shape as the input and `M` non-empty elements corresponding to the true positions in `to_retain`. Raises: TypeError: If `sp_input` is not a `SparseTensor`. """ sp_input = _convert_to_sparse_tensor(sp_input) to_retain = ops.convert_to_tensor(to_retain) # Shape checking, if shape is known at graph construction time retain_shape = to_retain.get_shape() retain_shape.assert_has_rank(1) sp_input.values.get_shape()[0].merge_with(retain_shape[0]) where_true = array_ops.reshape(array_ops.where(to_retain), [-1]) new_indices = array_ops.gather(sp_input.indices, where_true) new_values = array_ops.gather(sp_input.values, where_true) return sparse_tensor.SparseTensor(new_indices, new_values, array_ops.identity(sp_input.dense_shape))
Example #24
Source File: math_grad.py From auto-alt-text-lambda-api with MIT License | 5 votes |
def _UnsortedSegmentSumGrad(op, grad): """Gradient for SegmentSum.""" return array_ops.gather(grad, op.inputs[1]), None, None
Example #25
Source File: embedding_ops.py From lambda-packs with MIT License | 5 votes |
def _do_gather(params, ids, name=None): """Deals with doing gather differently for resource variables.""" if isinstance(params, resource_variable_ops.ResourceVariable): return params.sparse_read(ids, name=name) return array_ops.gather(params, ids, name=name)
Example #26
Source File: math_grad.py From auto-alt-text-lambda-api with MIT License | 5 votes |
def _SegmentMeanGrad(op, grad): """Gradient for SegmentMean.""" input_rank = array_ops.rank(op.inputs[0]) ones_shape = array_ops.concat([ array_ops.shape(op.inputs[1]), array_ops.fill(array_ops.expand_dims(input_rank - 1, 0), 1) ], 0) ones = array_ops.fill(ones_shape, constant_op.constant(1, dtype=grad.dtype)) scaled_grad = math_ops.div(grad, math_ops.segment_sum(ones, op.inputs[1])) return array_ops.gather(scaled_grad, op.inputs[1]), None
Example #27
Source File: math_grad.py From auto-alt-text-lambda-api with MIT License | 5 votes |
def _SegmentSumGrad(op, grad): """Gradient for SegmentSum.""" return array_ops.gather(grad, op.inputs[1]), None
Example #28
Source File: math_grad.py From auto-alt-text-lambda-api with MIT License | 5 votes |
def _ProdGrad(op, grad): """Gradient for Prod.""" # The gradient can be expressed by dividing the product by each entry of the # input tensor, but this approach can't deal with zeros in the input. # Here, we avoid this problem by composing the output as a product of two # cumprod operations. input_shape = array_ops.shape(op.inputs[0]) # Reshape reduction indices for the case where the parameter is a scalar reduction_indices = array_ops.reshape(op.inputs[1], [-1]) # Expand grad to full input shape output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims) grad = array_ops.reshape(grad, output_shape_kept_dims) grad = array_ops.tile(grad, tile_scaling) # Pack all reduced dimensions into a single one, so we can perform the # cumprod ops. If the reduction dims list is empty, it defaults to float32, # so we need to cast here. We put all the shape-related ops on CPU to avoid # copying back and forth, and since listdiff is CPU only. with ops.device("/cpu:0"): reduced = math_ops.cast(reduction_indices, dtypes.int32) idx = math_ops.range(0, array_ops.rank(op.inputs[0])) other, _ = array_ops.setdiff1d(idx, reduced) perm = array_ops.concat([reduced, other], 0) reduced_num = math_ops.reduce_prod(array_ops.gather(input_shape, reduced)) other_num = math_ops.reduce_prod(array_ops.gather(input_shape, other)) permuted = array_ops.transpose(op.inputs[0], perm) permuted_shape = array_ops.shape(permuted) reshaped = array_ops.reshape(permuted, (reduced_num, other_num)) # Calculate product, leaving out the current entry left = math_ops.cumprod(reshaped, axis=0, exclusive=True) right = math_ops.cumprod(reshaped, axis=0, exclusive=True, reverse=True) y = array_ops.reshape(left * right, permuted_shape) # Invert the transpose and reshape operations. # Make sure to set the statically known shape information through a reshape. out = grad * array_ops.transpose(y, array_ops.invert_permutation(perm)) return array_ops.reshape(out, input_shape), None
Example #29
Source File: gradients_impl.py From auto-alt-text-lambda-api with MIT License | 5 votes |
def _HandleNestedIndexedSlices(grad): assert isinstance(grad, ops.IndexedSlices) if isinstance(grad.values, ops.Tensor): return grad else: assert isinstance(grad.values, ops.IndexedSlices) g = _HandleNestedIndexedSlices(grad.values) return ops.IndexedSlices(g.values, array_ops.gather(grad.indices, g.indices), g.dense_shape)
Example #30
Source File: lazy_adam_optimizer.py From lambda-packs with MIT License | 5 votes |
def _apply_sparse(self, grad, var): beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype) beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) # m := beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_t = state_ops.scatter_update(m, grad.indices, beta1_t * array_ops.gather(m, grad.indices) + (1 - beta1_t) * grad.values, use_locking=self._use_locking) # v := beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_t = state_ops.scatter_update(v, grad.indices, beta2_t * array_ops.gather(v, grad.indices) + (1 - beta2_t) * math_ops.square(grad.values), use_locking=self._use_locking) # variable -= learning_rate * m_t / (epsilon_t + sqrt(v_t)) m_t_slice = array_ops.gather(m_t, grad.indices) v_t_slice = array_ops.gather(v_t, grad.indices) denominator_slice = math_ops.sqrt(v_t_slice) + epsilon_t var_update = state_ops.scatter_sub(var, grad.indices, lr * m_t_slice / denominator_slice, use_locking=self._use_locking) return control_flow_ops.group(var_update, m_t, v_t)