Python tensorflow.IndexedSlices() Examples
The following are 30
code examples of tensorflow.IndexedSlices().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow
, or try the search function
.
Example #1
Source File: graph_builder.py From DOTA_models with Apache License 2.0 | 9 votes |
def _clip_gradients(self, grad): """Clips gradients if the hyperparameter `gradient_clip_norm` requires it. Sparse tensors, in the form of IndexedSlices returned for the gradients of embeddings, require special handling. Args: grad: Gradient Tensor, IndexedSlices, or None. Returns: Optionally clipped gradient. """ if grad is not None and self.hyperparams.gradient_clip_norm > 0: logging.info('Clipping gradient %s', grad) if isinstance(grad, tf.IndexedSlices): tmp = tf.clip_by_norm(grad.values, self.hyperparams.gradient_clip_norm) return tf.IndexedSlices(tmp, grad.indices, grad.dense_shape) else: return tf.clip_by_norm(grad, self.hyperparams.gradient_clip_norm) else: return grad
Example #2
Source File: model_deploy.py From CBAM-tensorflow-slim with MIT License | 6 votes |
def _add_gradients_summaries(grads_and_vars): """Add histogram summaries to gradients. Note: The summaries are also added to the SUMMARIES collection. Args: grads_and_vars: A list of gradient to variable pairs (tuples). Returns: The _list_ of the added summaries for grads_and_vars. """ summaries = [] for grad, var in grads_and_vars: if grad is not None: if isinstance(grad, tf.IndexedSlices): grad_values = grad.values else: grad_values = grad summaries.append(tf.summary.histogram(var.op.name + ':gradient', grad_values)) summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm', tf.global_norm([grad_values]))) else: tf.logging.info('Var %s has no gradient', var.op.name) return summaries
Example #3
Source File: optimizers.py From THUMT with BSD 3-Clause "New" or "Revised" License | 6 votes |
def compute_gradients(self, loss, var_list=None, gate_gradients=tf.train.Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, grad_loss=None): grads_and_vars = self._optimizer.compute_gradients( loss * self._scale, var_list, gate_gradients, aggregation_method, colocate_gradients_with_ops, grad_loss) scaled_grads_and_vars = [] for grad, var in grads_and_vars: if isinstance(grad, tf.IndexedSlices): grad = tf.IndexedSlices(grad.values / self._scale, grad.indices, grad.dense_shape) elif isinstance(grad, tf.Tensor): grad = grad / self._scale scaled_grads_and_vars.append((grad, var)) return scaled_grads_and_vars
Example #4
Source File: model_deploy.py From CVTron with Apache License 2.0 | 6 votes |
def _add_gradients_summaries(grads_and_vars): """Add histogram summaries to gradients. Note: The summaries are also added to the SUMMARIES collection. Args: grads_and_vars: A list of gradient to variable pairs (tuples). Returns: The _list_ of the added summaries for grads_and_vars. """ summaries = [] for grad, var in grads_and_vars: if grad is not None: if isinstance(grad, tf.IndexedSlices): grad_values = grad.values else: grad_values = grad summaries.append(tf.summary.histogram(var.op.name + ':gradient', grad_values)) summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm', tf.global_norm([grad_values]))) else: tf.logging.info('Var %s has no gradient', var.op.name) return summaries
Example #5
Source File: model_utils.py From nucleus7 with Mozilla Public License 2.0 | 6 votes |
def add_histogram_summary(summary_name: str, value: tf.Tensor): """ Add histogram summary and also replace NaN in the value if needed on runtime Parameters ---------- summary_name name of the summary value histogram value to add to summary with name """ if isinstance(value, tf.IndexedSlices): tf.summary.histogram( summary_name, tf_ops.replace_nan_with_zeros(value.values)) else: tf.summary.histogram(summary_name, tf_ops.replace_nan_with_zeros(value))
Example #6
Source File: model_deploy.py From ctw-baseline with MIT License | 6 votes |
def _add_gradients_summaries(grads_and_vars): """Add histogram summaries to gradients. Note: The summaries are also added to the SUMMARIES collection. Args: grads_and_vars: A list of gradient to variable pairs (tuples). Returns: The _list_ of the added summaries for grads_and_vars. """ summaries = [] for grad, var in grads_and_vars: if grad is not None: if isinstance(grad, tf.IndexedSlices): grad_values = grad.values else: grad_values = grad summaries.append(tf.summary.histogram(var.op.name + ':gradient', grad_values)) summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm', tf.global_norm([grad_values]))) else: tf.logging.info('Var %s has no gradient', var.op.name) return summaries
Example #7
Source File: model_deploy.py From STORK with MIT License | 6 votes |
def _add_gradients_summaries(grads_and_vars): """Add histogram summaries to gradients. Note: The summaries are also added to the SUMMARIES collection. Args: grads_and_vars: A list of gradient to variable pairs (tuples). Returns: The _list_ of the added summaries for grads_and_vars. """ summaries = [] for grad, var in grads_and_vars: if grad is not None: if isinstance(grad, tf.IndexedSlices): grad_values = grad.values else: grad_values = grad summaries.append(tf.summary.histogram(var.op.name + ':gradient', grad_values)) summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm', tf.global_norm([grad_values]))) else: tf.logging.info('Var %s has no gradient', var.op.name) return summaries
Example #8
Source File: optimizers.py From THUMT with BSD 3-Clause "New" or "Revised" License | 6 votes |
def compute_gradients(self, loss, var_list=None, gate_gradients=tf.train.Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, grad_loss=None): scale_var = self._create_non_slot_variable( initial_value=self._scale, name="scale", colocate_with=loss) grads_and_vars = self._optimizer.compute_gradients( loss * scale_var, var_list, gate_gradients, aggregation_method, colocate_gradients_with_ops, grad_loss) scaled_grads_and_vars = [] for grad, var in grads_and_vars: if isinstance(grad, tf.IndexedSlices): grad = tf.IndexedSlices(grad.values / scale_var, grad.indices, grad.dense_shape) elif isinstance(grad, tf.Tensor): grad = grad / scale_var scaled_grads_and_vars.append((grad, var)) return scaled_grads_and_vars
Example #9
Source File: model_deploy.py From morph-net with Apache License 2.0 | 6 votes |
def _add_gradients_summaries(grads_and_vars): """Add histogram summaries to gradients. Note: The summaries are also added to the SUMMARIES collection. Args: grads_and_vars: A list of gradient to variable pairs (tuples). Returns: The _list_ of the added summaries for grads_and_vars. """ summaries = [] for grad, var in grads_and_vars: if grad is not None: if isinstance(grad, tf.IndexedSlices): grad_values = grad.values else: grad_values = grad summaries.append(tf.summary.histogram(var.op.name + ':gradient', grad_values)) summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm', tf.global_norm([grad_values]))) else: tf.logging.info('Var %s has no gradient', var.op.name) return summaries
Example #10
Source File: neumf_model.py From models with Apache License 2.0 | 6 votes |
def sparse_to_dense_grads(grads_and_vars): """Convert sparse gradients to dense gradients. All sparse gradients, which are represented as instances of tf.IndexedSlices, are converted to dense Tensors. Dense gradients, which are represents as Tensors, are unchanged. The purpose of this conversion is that for small embeddings, which are used by this model, applying dense gradients with the AdamOptimizer is faster than applying sparse gradients. Args grads_and_vars: A list of (gradient, variable) tuples. Each gradient can be a Tensor or an IndexedSlices. Tensors are unchanged, and IndexedSlices are converted to dense Tensors. Returns: The same list of (gradient, variable) as `grads_and_vars`, except each IndexedSlices gradient is converted to a Tensor. """ # Calling convert_to_tensor changes IndexedSlices into Tensors, and leaves # Tensors unchanged. return [(tf.convert_to_tensor(g), v) for g, v in grads_and_vars]
Example #11
Source File: model_deploy.py From edafa with MIT License | 6 votes |
def _add_gradients_summaries(grads_and_vars): """Add histogram summaries to gradients. Note: The summaries are also added to the SUMMARIES collection. Args: grads_and_vars: A list of gradient to variable pairs (tuples). Returns: The _list_ of the added summaries for grads_and_vars. """ summaries = [] for grad, var in grads_and_vars: if grad is not None: if isinstance(grad, tf.IndexedSlices): grad_values = grad.values else: grad_values = grad summaries.append(tf.summary.histogram(var.op.name + ':gradient', grad_values)) summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm', tf.global_norm([grad_values]))) else: tf.logging.info('Var %s has no gradient', var.op.name) return summaries
Example #12
Source File: novograd_test.py From addons with Apache License 2.0 | 6 votes |
def run_sparse_sample(iterations, expected, optimizer): var_0 = tf.Variable([1.0, 2.0]) var_1 = tf.Variable([3.0, 4.0]) grad_0 = tf.IndexedSlices( tf.constant([0.1, 0.2]), tf.constant([0, 1]), tf.constant([2]) ) grad_1 = tf.IndexedSlices( tf.constant([0.3, 0.4]), tf.constant([0, 1]), tf.constant([2]) ) grads_and_vars = list(zip([grad_0, grad_1], [var_0, var_1])) for _ in range(iterations): optimizer.apply_gradients(grads_and_vars) np.testing.assert_allclose(var_0.read_value(), expected[0], atol=2e-4) np.testing.assert_allclose(var_1.read_value(), expected[1], atol=2e-4)
Example #13
Source File: lookahead_test.py From addons with Apache License 2.0 | 6 votes |
def run_sparse_sample(iterations, optimizer, seed=0x2019): np.random.seed(seed) tf.random.set_seed(seed) val_0 = np.random.random((2,)) val_1 = np.random.random((2,)) var_0 = tf.Variable(val_0, dtype=tf.dtypes.float32) var_1 = tf.Variable(val_1, dtype=tf.dtypes.float32) grad_0 = tf.IndexedSlices( tf.constant([np.random.standard_normal()]), tf.constant([0]), tf.constant([2]), ) grad_1 = tf.IndexedSlices( tf.constant([np.random.standard_normal()]), tf.constant([1]), tf.constant([2]), ) grads_and_vars = list(zip([grad_0, grad_1], [var_0, var_1])) for _ in range(iterations): optimizer.apply_gradients(grads_and_vars) return [val_0, val_1], [var_0, var_1]
Example #14
Source File: linear_operator.py From kfac with Apache License 2.0 | 6 votes |
def matmul_right(self, x, adjoint=False, adjoint_arg=False, name="matmul"): # pylint: disable=missing-docstring with self._name_scope(name): if isinstance(x, tf.IndexedSlices): return self._matmul_right_sparse( x, adjoint=adjoint, adjoint_arg=adjoint_arg) x = tf.convert_to_tensor(x, name="x") self._check_input_dtype(x) self_dim = -1 if adjoint else -2 arg_dim = -2 if adjoint_arg else -1 self.shape[self_dim].assert_is_compatible_with(x.get_shape()[arg_dim]) return self._matmul_right(x, adjoint=adjoint, adjoint_arg=adjoint_arg)
Example #15
Source File: utils.py From kfac with Apache License 2.0 | 6 votes |
def layer_params_to_mat2d(vector): """Converts a vector shaped like layer parameters to a 2D matrix. In particular, we reshape the weights/filter component of the vector to be 2D, flattening all leading (input) dimensions. If there is a bias component, we concatenate it to the reshaped weights/filter component. Args: vector: A Tensor or pair of Tensors shaped like layer parameters. Returns: A 2D Tensor with the same coefficients and the same output dimension. """ if isinstance(vector, (tuple, list)): w_part, b_part = vector w_part_reshaped = tf.reshape(w_part, [-1, w_part.shape.as_list()[-1]]) return tf.concat((w_part_reshaped, tf.reshape(b_part, [1, -1])), axis=0) elif isinstance(vector, tf.IndexedSlices): return vector else: # Tensor or Tensor-like. return tf.reshape(vector, [-1, vector.shape.as_list()[-1]])
Example #16
Source File: model_deploy.py From DOTA_models with Apache License 2.0 | 6 votes |
def _add_gradients_summaries(grads_and_vars): """Add histogram summaries to gradients. Note: The summaries are also added to the SUMMARIES collection. Args: grads_and_vars: A list of gradient to variable pairs (tuples). Returns: The _list_ of the added summaries for grads_and_vars. """ summaries = [] for grad, var in grads_and_vars: if grad is not None: if isinstance(grad, tf.IndexedSlices): grad_values = grad.values else: grad_values = grad summaries.append(tf.summary.histogram(var.op.name + ':gradient', grad_values)) summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm', tf.global_norm([grad_values]))) else: tf.logging.info('Var %s has no gradient', var.op.name) return summaries
Example #17
Source File: utils.py From kfac with Apache License 2.0 | 6 votes |
def matmul_diag_sparse(A_diag, B, name=None): # pylint: disable=invalid-name """Computes matmul(A, B) where A is a diagonal matrix, B is sparse. Args: A_diag: diagonal entries of matrix A of shape [m, m]. B: tf.IndexedSlices. Represents matrix of shape [m, n]. name: str. Name of op. Returns: tf.IndexedSlices resulting from matmul(A, B). Raises: ValueError: If A_diag is not rank-1. ValueError: If B doesn't represent a matrix. """ with tf.name_scope(name, "matmul_diag_sparse", [A_diag, B]): A_diag = tf.convert_to_tensor(A_diag) if A_diag.shape.ndims != 1: raise ValueError("A_diag must be a rank-1 Tensor.") if B.indices.shape.ndims != 1 or B.values.shape.ndims != 2: raise ValueError("B must represent a matrix. Found: %s." % B) a = tf.gather(A_diag, B.indices) a = tf.reshape(a, list(a.shape) + [1] * (B.values.shape.ndims - 1)) return tf.IndexedSlices(a * B.values, B.indices, dense_shape=B.dense_shape)
Example #18
Source File: model_deploy.py From garbage-object-detection-tensorflow with MIT License | 6 votes |
def _add_gradients_summaries(grads_and_vars): """Add histogram summaries to gradients. Note: The summaries are also added to the SUMMARIES collection. Args: grads_and_vars: A list of gradient to variable pairs (tuples). Returns: The _list_ of the added summaries for grads_and_vars. """ summaries = [] for grad, var in grads_and_vars: if grad is not None: if isinstance(grad, tf.IndexedSlices): grad_values = grad.values else: grad_values = grad summaries.append(tf.summary.histogram(var.op.name + ':gradient', grad_values)) summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm', tf.global_norm([grad_values]))) else: tf.logging.info('Var %s has no gradient', var.op.name) return summaries
Example #19
Source File: model_deploy.py From yolo_v2 with Apache License 2.0 | 6 votes |
def _add_gradients_summaries(grads_and_vars): """Add histogram summaries to gradients. Note: The summaries are also added to the SUMMARIES collection. Args: grads_and_vars: A list of gradient to variable pairs (tuples). Returns: The _list_ of the added summaries for grads_and_vars. """ summaries = [] for grad, var in grads_and_vars: if grad is not None: if isinstance(grad, tf.IndexedSlices): grad_values = grad.values else: grad_values = grad summaries.append(tf.summary.histogram(var.op.name + ':gradient', grad_values)) summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm', tf.global_norm([grad_values]))) else: tf.logging.info('Var %s has no gradient', var.op.name) return summaries
Example #20
Source File: graph_builder.py From yolo_v2 with Apache License 2.0 | 6 votes |
def _clip_gradients(self, grad): """Clips gradients if the hyperparameter `gradient_clip_norm` requires it. Sparse tensors, in the form of IndexedSlices returned for the gradients of embeddings, require special handling. Args: grad: Gradient Tensor, IndexedSlices, or None. Returns: Optionally clipped gradient. """ if grad is not None and self.hyperparams.gradient_clip_norm > 0: logging.info('Clipping gradient %s', grad) if isinstance(grad, tf.IndexedSlices): tmp = tf.clip_by_norm(grad.values, self.hyperparams.gradient_clip_norm) return tf.IndexedSlices(tmp, grad.indices, grad.dense_shape) else: return tf.clip_by_norm(grad, self.hyperparams.gradient_clip_norm) else: return grad
Example #21
Source File: problem_generator.py From yolo_v2 with Apache License 2.0 | 6 votes |
def gradients(self, objective, parameters): """Compute gradients of the objective with respect to the parameters. Args: objective: The objective op (e.g. output of self.objective()) parameters: A list of tensors (the parameters to optimize) Returns: A list of tensors representing the gradient for each parameter, returned in the same order as the given list """ grads = tf.gradients(objective, list(parameters)) noisy_grads = [] for grad in grads: if isinstance(grad, tf.IndexedSlices): noise = self.noise_stdev * tf.random_normal(tf.shape(grad.values)) new_grad = tf.IndexedSlices(grad.values + noise, grad.indices) else: new_grad = grad + self.noise_stdev * tf.random_normal(grad.get_shape()) noisy_grads.append(new_grad) return noisy_grads
Example #22
Source File: training.py From ELMo_Chin with Apache License 2.0 | 6 votes |
def _deduplicate_indexed_slices(values, indices): """Sums `values` associated with any non-unique `indices`. Args: values: A `Tensor` with rank >= 1. indices: A one-dimensional integer `Tensor`, indexing into the first dimension of `values` (as in an IndexedSlices object). Returns: A tuple of (`summed_values`, `unique_indices`) where `unique_indices` is a de-duplicated version of `indices` and `summed_values` contains the sum of `values` slices associated with each unique index. """ unique_indices, new_index_positions = tf.unique(indices) summed_values = tf.unsorted_segment_sum( values, new_index_positions, tf.shape(unique_indices)[0]) return (summed_values, unique_indices)
Example #23
Source File: training.py From ELMo_Chin with Apache License 2.0 | 6 votes |
def clip_by_global_norm_summary(t_list, clip_norm, norm_name, variables): # wrapper around tf.clip_by_global_norm that also does summary ops of norms # compute norms # use global_norm with one element to handle IndexedSlices vs dense norms = [tf.global_norm([t]) for t in t_list] # summary ops before clipping summary_ops = [] for ns, v in zip(norms, variables): name = 'norm_pre_clip/' + v.name.replace(":", "_") summary_ops.append(tf.summary.scalar(name, ns)) # clip clipped_t_list, tf_norm = tf.clip_by_global_norm(t_list, clip_norm) # summary ops after clipping norms_post = [tf.global_norm([t]) for t in clipped_t_list] for ns, v in zip(norms_post, variables): name = 'norm_post_clip/' + v.name.replace(":", "_") summary_ops.append(tf.summary.scalar(name, ns)) summary_ops.append(tf.summary.scalar(norm_name, tf_norm)) return clipped_t_list, tf_norm, summary_ops
Example #24
Source File: batch_allreduce.py From training_results_v0.5 with Apache License 2.0 | 6 votes |
def _all_reduce_using_copy(tensors_across_devices, use_mean): """Does an all-reduce of a list of tensors by copying to the current device. The tensors are copied to the current device and then reduced. Args: tensors_across_devices: A list of tensors, each on a different device. use_mean: Whether to take the mean of the tensors instead of a sum: Returns: A reduced tensor on the current device. """ assert tensors_across_devices if isinstance(tensors_across_devices[0], tf.IndexedSlices): reduced_tensor = gradients_impl._AggregateIndexedSlicesGradients( tensors_across_devices) if use_mean: val = tf.multiply(reduced_tensor.values, float(1. / len(tensors_across_devices))) reduced_tensor = tf.IndexedSlices(val, reduced_tensor.indices, reduced_tensor.dense_shape) else: reduced_tensor = tf.add_n(tensors_across_devices) if use_mean: reduced_tensor *= 1. / len(tensors_across_devices) return reduced_tensor
Example #25
Source File: batch_allreduce.py From training_results_v0.5 with Apache License 2.0 | 6 votes |
def _all_reduce_using_copy(tensors_across_devices, use_mean): """Does an all-reduce of a list of tensors by copying to the current device. The tensors are copied to the current device and then reduced. Args: tensors_across_devices: A list of tensors, each on a different device. use_mean: Whether to take the mean of the tensors instead of a sum: Returns: A reduced tensor on the current device. """ assert tensors_across_devices if isinstance(tensors_across_devices[0], tf.IndexedSlices): reduced_tensor = gradients_impl._AggregateIndexedSlicesGradients( tensors_across_devices) if use_mean: val = tf.multiply(reduced_tensor.values, float(1. / len(tensors_across_devices))) reduced_tensor = tf.IndexedSlices(val, reduced_tensor.indices, reduced_tensor.dense_shape) else: reduced_tensor = tf.add_n(tensors_across_devices) if use_mean: reduced_tensor *= 1. / len(tensors_across_devices) return reduced_tensor
Example #26
Source File: batch_allreduce.py From training_results_v0.5 with Apache License 2.0 | 6 votes |
def _all_reduce_using_copy(tensors_across_devices, use_mean): """Does an all-reduce of a list of tensors by copying to the current device. The tensors are copied to the current device and then reduced. Args: tensors_across_devices: A list of tensors, each on a different device. use_mean: Whether to take the mean of the tensors instead of a sum: Returns: A reduced tensor on the current device. """ assert tensors_across_devices if isinstance(tensors_across_devices[0], tf.IndexedSlices): reduced_tensor = gradients_impl._AggregateIndexedSlicesGradients( tensors_across_devices) if use_mean: val = tf.multiply(reduced_tensor.values, float(1. / len(tensors_across_devices))) reduced_tensor = tf.IndexedSlices(val, reduced_tensor.indices, reduced_tensor.dense_shape) else: reduced_tensor = tf.add_n(tensors_across_devices) if use_mean: reduced_tensor *= 1. / len(tensors_across_devices) return reduced_tensor
Example #27
Source File: model_deploy.py From Hands-On-Machine-Learning-with-OpenCV-4 with MIT License | 6 votes |
def _add_gradients_summaries(grads_and_vars): """Add histogram summaries to gradients. Note: The summaries are also added to the SUMMARIES collection. Args: grads_and_vars: A list of gradient to variable pairs (tuples). Returns: The _list_ of the added summaries for grads_and_vars. """ summaries = [] for grad, var in grads_and_vars: if grad is not None: if isinstance(grad, tf.IndexedSlices): grad_values = grad.values else: grad_values = grad summaries.append(tf.summary.histogram(var.op.name + ':gradient', grad_values)) summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm', tf.global_norm([grad_values]))) else: tf.logging.info('Var %s has no gradient', var.op.name) return summaries
Example #28
Source File: session_context.py From parallax with Apache License 2.0 | 6 votes |
def _convert_fetch(self, fetch): if fetch is None: raise TypeError('Fetch argument %r has invalid type %r' % (fetch, type(fetch))) elif isinstance(fetch, (list, tuple)): return [self._convert_fetch(f) for f in fetch] elif isinstance(fetch, dict): keys = list(fetch.keys()) values = [self._convert_fetch(f) for f in fetch.values()] return dict(zip(keys, values)) else: if isinstance(fetch, tf.SparseTensor): return [tf.SparseTensor(self._replica_dict[fetch.indices][i], self._replica_dict[fetch.values][i], self._replica_dict[fetch.dense_shape][i]) for i in range(self._num_replicas_per_worker)] elif isinstance(fetch, tf.IndexedSlices): return [tf.IndexedSlices( self._replica_dict[fetch.values][i], self._replica_dict[fetch.indices][i], None if fetch.dense_shape is None \ else self._replica_dict[fetch.dense_shape][i]) for i in range(self._num_replicas_per_worker)] else: return self._read_converted_names(fetch)
Example #29
Source File: utils.py From kfac with Apache License 2.0 | 6 votes |
def mat2d_to_layer_params(vector_template, mat2d): """Converts a canonical 2D matrix representation back to a vector. Args: vector_template: A Tensor or pair of Tensors shaped like layer parameters. mat2d: A 2D Tensor with the same shape as the value of layer_params_to_mat2d(vector_template). Returns: A Tensor or pair of Tensors with the same coefficients as mat2d and the same shape as vector_template. """ if isinstance(vector_template, (tuple, list)): w_part, b_part = mat2d[:-1], mat2d[-1] return tf.reshape(w_part, vector_template[0].shape), b_part elif isinstance(vector_template, tf.IndexedSlices): if not isinstance(mat2d, tf.IndexedSlices): raise TypeError( "If vector_template is an IndexedSlices, so should mat2d.") return mat2d else: return tf.reshape(mat2d, vector_template.shape)
Example #30
Source File: opt.py From finetune-transformer-lm with MIT License | 5 votes |
def adam(params, grads, lr, schedule, t_total, b1=0.9, b2=0.999, e=1e-8, l2=0, vector_l2=False, max_grad_norm=-1, **kwargs): """ adam with weight decay fix """ t = tf.Variable(0, dtype=tf.float32, trainable=False) tt = t+1 updates = [t.assign(tt)] if max_grad_norm > 0: grads, _ = tf.clip_by_global_norm(grads, max_grad_norm) for p, g in zip(params, grads): if p is None or g is None: print("can't train", p.name, g) else: if isinstance(g, tf.IndexedSlices): g = tf.convert_to_tensor(g) m = tf.Variable(p*0, dtype=tf.float32, trainable=False) v = tf.Variable(p*0, dtype=tf.float32, trainable=False) lrt = lr*tf.sqrt(1-b2**tt)/(1-b1**tt) lrt *= schedule(t/t_total) mt = b1*m + (1-b1)*g vt = b2*v + (1-b2)*g*g if (len(p.get_shape()) > 1 or vector_l2) and l2 > 0: pt = p - lrt * (mt / (tf.sqrt(vt) + e) + l2*p) else: pt = p - lrt * (mt / (tf.sqrt(vt) + e)) updates.extend([m.assign(mt), v.assign(vt), p.assign(pt)]) return tf.group(*updates)