Python tensorflow.GradientTape() Examples
The following are 30
code examples of tensorflow.GradientTape().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow
, or try the search function
.
Example #1
Source File: classifier.py From ashpy with Apache License 2.0 | 6 votes |
def train_step(self, features, labels): """ Train step. Args: features: Input features. labels: The labels. Returns: Loss value. """ with tf.GradientTape() as tape: loss = self._loss( self._context, features=features, labels=labels, training=True ) gradients = tape.gradient(loss, self._model.trainable_variables) self._optimizer.apply_gradients(zip(gradients, self._model.trainable_variables)) return loss
Example #2
Source File: train.py From graphics with Apache License 2.0 | 6 votes |
def wrapped_tf_function(points, label): """Performs one step of minimization of the loss.""" # --- subsampling (order DO matter) points = points[0:FLAGS.num_points, ...] # --- augmentation if FLAGS.augment: points = tf.map_fn(augment.rotate, points) points = augment.jitter(points) # --- training with tf.GradientTape() as tape: logits = model(points, training=True) loss = model.loss(label, logits) variables = model.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) return loss
Example #3
Source File: net_work.py From face_landmark with Apache License 2.0 | 6 votes |
def train_step(self, inputs): """One train step. Args: inputs: one batch input. Returns: loss: Scaled loss. """ image, label = inputs with tf.GradientTape() as tape: predictions = self.model(image, training=True) loss = self.compute_loss(predictions,label,training=True) gradients = tape.gradient(loss, self.model.trainable_variables) gradients = [(tf.clip_by_value(grad, -5.0, 5.0)) for grad in gradients] self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables)) return loss
Example #4
Source File: training.py From nlp-journey with Apache License 2.0 | 6 votes |
def train_step(self, inp, tar): tar_inp = tar[:, :-1] tar_real = tar[:, 1:] enc_padding_mask, combined_mask, dec_padding_mask = self.mask_encoder.create_masks(inp, tar_inp) with tf.GradientTape() as tape: predictions, _ = self.transformer(inp, tar_inp, True, enc_padding_mask, combined_mask, dec_padding_mask) loss = self.loss_function(tar_real, predictions) gradients = tape.gradient(loss, self.transformer.trainable_variables) self.optimizer.apply_gradients(zip(gradients, self.transformer.trainable_variables)) self.train_loss(loss) self.train_accuracy(tar_real, predictions)
Example #5
Source File: gaifo.py From tf2rl with MIT License | 6 votes |
def _train_body(self, agent_states, agent_next_states, expert_states, expert_next_states): epsilon = 1e-8 with tf.device(self.device): with tf.GradientTape() as tape: real_logits = self.disc([expert_states, expert_next_states]) fake_logits = self.disc([agent_states, agent_next_states]) loss = -(tf.reduce_mean(tf.math.log(real_logits + epsilon)) + tf.reduce_mean(tf.math.log(1. - fake_logits + epsilon))) grads = tape.gradient(loss, self.disc.trainable_variables) self.optimizer.apply_gradients( zip(grads, self.disc.trainable_variables)) accuracy = \ tf.reduce_mean(tf.cast(real_logits >= 0.5, tf.float32)) / 2. + \ tf.reduce_mean(tf.cast(fake_logits < 0.5, tf.float32)) / 2. js_divergence = self._compute_js_divergence( fake_logits, real_logits) return loss, accuracy, js_divergence
Example #6
Source File: dqn.py From tf2rl with MIT License | 6 votes |
def _train_body(self, states, actions, next_states, rewards, done, weights): with tf.device(self.device): with tf.GradientTape() as tape: if self._enable_categorical_dqn: td_errors = self._compute_td_error_body_distributional( states, actions, next_states, rewards, done) q_func_loss = tf.reduce_mean( huber_loss(tf.negative(td_errors), delta=self.max_grad) * weights) else: td_errors = self._compute_td_error_body( states, actions, next_states, rewards, done) q_func_loss = tf.reduce_mean( huber_loss(td_errors, delta=self.max_grad) * weights) q_func_grad = tape.gradient( q_func_loss, self.q_func.trainable_variables) self.q_func_optimizer.apply_gradients( zip(q_func_grad, self.q_func.trainable_variables)) return td_errors, q_func_loss
Example #7
Source File: hardshrink_test.py From addons with Apache License 2.0 | 6 votes |
def verify_funcs_are_equivalent(dtype): x_np = np.random.uniform(-10, 10, size=(4, 4)).astype(dtype) x = tf.convert_to_tensor(x_np) lower = np.random.uniform(-10, 10) upper = lower + np.random.uniform(0, 10) with tf.GradientTape(persistent=True) as t: t.watch(x) y_native = _hardshrink_custom_op(x, lower, upper) y_py = _hardshrink_py(x, lower, upper) test_utils.assert_allclose_according_to_type(y_native, y_py) grad_native = t.gradient(y_native, x) grad_py = t.gradient(y_py, x) test_utils.assert_allclose_according_to_type(grad_native, grad_py)
Example #8
Source File: gail.py From tf2rl with MIT License | 6 votes |
def _train_body(self, agent_states, agent_acts, expert_states, expert_acts): epsilon = 1e-8 with tf.device(self.device): with tf.GradientTape() as tape: real_logits = self.disc([expert_states, expert_acts]) fake_logits = self.disc([agent_states, agent_acts]) loss = -(tf.reduce_mean(tf.math.log(real_logits + epsilon)) + tf.reduce_mean(tf.math.log(1. - fake_logits + epsilon))) grads = tape.gradient(loss, self.disc.trainable_variables) self.optimizer.apply_gradients( zip(grads, self.disc.trainable_variables)) accuracy = \ tf.reduce_mean(tf.cast(real_logits >= 0.5, tf.float32)) / 2. + \ tf.reduce_mean(tf.cast(fake_logits < 0.5, tf.float32)) / 2. js_divergence = self._compute_js_divergence( fake_logits, real_logits) return loss, accuracy, js_divergence
Example #9
Source File: softshrink_test.py From addons with Apache License 2.0 | 6 votes |
def verify_funcs_are_equivalent(dtype): x_np = np.random.uniform(-10, 10, size=(4, 4)).astype(dtype) x = tf.convert_to_tensor(x_np) lower = np.random.uniform(-10, 10) upper = lower + np.random.uniform(0, 10) with tf.GradientTape(persistent=True) as t: t.watch(x) y_native = softshrink(x, lower, upper) y_py = _softshrink_py(x, lower, upper) test_utils.assert_allclose_according_to_type(y_native, y_py) grad_native = t.gradient(y_native, x) grad_py = t.gradient(y_py, x) test_utils.assert_allclose_according_to_type(grad_native, grad_py)
Example #10
Source File: interpolate_spline_test.py From addons with Apache License 2.0 | 6 votes |
def test_interpolation_gradient(): """Correctness of gradients is assumed. We compute them and check they exist. """ tp = _QuadraticPlusSinProblemND() (query_points, _, train_points, train_values) = tp.get_problem(optimizable=True) regularization = 0.001 for interpolation_order in (1, 2, 3, 4): with tf.GradientTape() as g: interpolator = interpolate_spline( train_points, train_values, query_points, interpolation_order, regularization, ) gradients = g.gradient(interpolator, train_points).numpy() assert np.sum(np.abs(gradients)) != 0
Example #11
Source File: dense_image_warp_test.py From addons with Apache License 2.0 | 6 votes |
def test_gradients_exist(): """Check that backprop can run. The correctness of the gradients is assumed, since the forward propagation is tested to be correct and we only use built-in tf ops. However, we perform a simple test to make sure that backprop can actually run. """ batch_size, height, width, num_channels = [4, 5, 6, 7] image_shape = [batch_size, height, width, num_channels] image = tf.random.normal(image_shape) flow_shape = [batch_size, height, width, 2] flows = tf.Variable(tf.random.normal(shape=flow_shape) * 0.25, dtype=tf.float32) with tf.GradientTape() as t: interp = dense_image_warp(image, flows) grads = t.gradient(interp, flows).numpy() assert np.sum(np.abs(grads)) != 0
Example #12
Source File: keras_model.py From deepchem with MIT License | 6 votes |
def _create_gradient_fn(self, variables): """Create a function that computes gradients and applies them to the model. Because of the way TensorFlow function tracing works, we need to create a separate function for each new set of variables. """ @tf.function(experimental_relax_shapes=True) def apply_gradient_for_batch(inputs, labels, weights, loss): with tf.GradientTape() as tape: outputs = self.model(inputs, training=True) if isinstance(outputs, tf.Tensor): outputs = [outputs] if self._loss_outputs is not None: outputs = [outputs[i] for i in self._loss_outputs] batch_loss = loss(outputs, labels, weights) if variables is None: vars = self.model.trainable_variables else: vars = variables grads = tape.gradient(batch_loss, vars) self._tf_optimizer.apply_gradients(zip(grads, vars)) self._global_step.assign_add(1) return batch_loss return apply_gradient_for_batch
Example #13
Source File: gan.py From deepchem with MIT License | 6 votes |
def call(self, inputs, conditional_inputs): with tf.GradientTape() as tape: for layer in inputs: tape.watch(layer) output = self.discriminator(_list_or_tensor(inputs + conditional_inputs)) gradients = tape.gradient(output, inputs) gradients = [g for g in gradients if g is not None] if len(gradients) > 0: norm2 = 0.0 for g in gradients: g2 = tf.square(g) dims = len(g.shape) if dims > 1: g2 = tf.reduce_sum(g2, axis=list(range(1, dims))) norm2 += g2 penalty = tf.square(tf.sqrt(norm2) - 1.0) penalty = self.gan.gradient_penalty * tf.reduce_mean(penalty) else: penalty = 0.0 return [output, penalty]
Example #14
Source File: maml.py From deepchem with MIT License | 6 votes |
def train_on_current_task(self, optimization_steps=1, restore=True): """Perform a few steps of gradient descent to fine tune the model on the current task. Parameters ---------- optimization_steps: int the number of steps of gradient descent to perform restore: bool if True, restore the model from the most recent checkpoint before optimizing """ if restore: self.restore() variables = self.learner.variables for i in range(optimization_steps): inputs = self.learner.get_batch() with tf.GradientTape() as tape: loss, _ = self.learner.compute_model(inputs, variables, True) gradients = tape.gradient(loss, variables) self._tf_task_optimizer.apply_gradients(zip(gradients, variables))
Example #15
Source File: maml.py From deepchem with MIT License | 6 votes |
def _compute_meta_loss(self, inputs, inputs2, variables): """This is called during fitting to compute the meta-loss (the loss after a few steps of optimization), and its gradient. """ updated_variables = variables with tf.GradientTape() as meta_tape: for k in range(self.optimization_steps): with tf.GradientTape() as tape: loss, _ = self.learner.compute_model(inputs, updated_variables, True) gradients = tape.gradient(loss, updated_variables) updated_variables = [ v if g is None else v - self.learning_rate * g for v, g in zip(updated_variables, gradients) ] meta_loss, _ = self.learner.compute_model(inputs2, updated_variables, True) meta_gradients = meta_tape.gradient(meta_loss, variables) return meta_loss, meta_gradients
Example #16
Source File: quantizers_test.py From larq with Apache License 2.0 | 6 votes |
def test_swish_grad(self): def swish_grad(x, beta): return ( beta * (2 - beta * x * np.tanh(beta * x / 2)) / (1 + np.cosh(beta * x)) ) x = testing_utils.generate_real_values_with_zeros(shape=(8, 3, 3, 16)) tf_x = tf.Variable(x) with tf.GradientTape() as tape: activation = lq.quantizers.SwishSign()(tf_x) grad = tape.gradient(activation, tf_x) np.testing.assert_allclose(grad.numpy(), swish_grad(x, beta=5.0)) with tf.GradientTape() as tape: activation = lq.quantizers.SwishSign(beta=10.0)(tf_x) grad = tape.gradient(activation, tf_x) np.testing.assert_allclose(grad.numpy(), swish_grad(x, beta=10.0))
Example #17
Source File: main.py From Fast-SRGAN with MIT License | 6 votes |
def pretrain_step(model, x, y): """ Single step of generator pre-training. Args: model: A model object with a tf keras compiled generator. x: The low resolution image tensor. y: The high resolution image tensor. """ with tf.GradientTape() as tape: fake_hr = model.generator(x) loss_mse = tf.keras.losses.MeanSquaredError()(y, fake_hr) grads = tape.gradient(loss_mse, model.generator.trainable_variables) model.gen_optimizer.apply_gradients(zip(grads, model.generator.trainable_variables)) return loss_mse
Example #18
Source File: sparse_image_warp_test.py From addons with Apache License 2.0 | 5 votes |
def test_that_backprop_runs(): """Making sure the gradients can be computed.""" batch_size = 1 image_height = 9 image_width = 12 image = tf.Variable( np.random.uniform(size=[batch_size, image_height, image_width, 3]), dtype=tf.float32, ) control_point_locations = [[3.0, 3.0]] control_point_locations = tf.constant( np.float32(np.expand_dims(control_point_locations, 0)) ) control_point_displacements = [[0.25, -0.5]] control_point_displacements = tf.constant( np.float32(np.expand_dims(control_point_displacements, 0)) ) with tf.GradientTape() as t: warped_image, _ = sparse_image_warp( image, control_point_locations, control_point_locations + control_point_displacements, num_boundary_points=3, ) gradients = t.gradient(warped_image, image).numpy() assert np.sum(np.abs(gradients)) != 0
Example #19
Source File: weight_decay_optimizers.py From addons with Apache License 2.0 | 5 votes |
def minimize(self, loss, var_list, grad_loss=None, name=None, decay_var_list=None): """Minimize `loss` by updating `var_list`. This method simply computes gradient using `tf.GradientTape` and calls `apply_gradients()`. If you want to process the gradient before applying then call `tf.GradientTape` and `apply_gradients()` explicitly instead of using this function. Args: loss: A callable taking no arguments which returns the value to minimize. var_list: list or tuple of `Variable` objects to update to minimize `loss`, or a callable returning the list or tuple of `Variable` objects. Use callable when the variable list would otherwise be incomplete before `minimize` since the variables are created at the first time `loss` is called. grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`. decay_var_list: Optional list of variables to be decayed. Defaults to all variables in var_list. name: Optional name for the returned operation. Returns: An Operation that updates the variables in `var_list`. Raises: ValueError: If some of the variables are not `Variable` objects. """ self._decay_var_list = ( set([_ref(v) for v in decay_var_list]) if decay_var_list else False ) return super().minimize(loss, var_list=var_list, grad_loss=grad_loss, name=name)
Example #20
Source File: quantizers_test.py From larq with Apache License 2.0 | 5 votes |
def test_identity_ste_grad(self, fn): x = testing_utils.generate_real_values_with_zeros(shape=(8, 3, 3, 16)) tf_x = tf.Variable(x) with tf.GradientTape() as tape: activation = fn(tf_x) grad = tape.gradient(activation, tf_x) np.testing.assert_allclose(grad.numpy(), np.ones_like(x))
Example #21
Source File: levenberg_marquardt.py From graphics with Apache License 2.0 | 5 votes |
def _values_and_jacobian(residuals, variables): """Computes the residual values and the Jacobian matrix. Args: residuals: A list of residuals. variables: A list of variables. Returns: The residual values and the Jacobian matrix. """ def _compute_residual_values(residuals, variables): """Computes the residual values.""" return tf.concat([ tf.reshape(residual(*variables), shape=(-1,)) for residual in residuals ], axis=-1) def _compute_jacobian(values, variables, tape): """Computes the Jacobian matrix.""" jacobians = tape.jacobian( values, variables, unconnected_gradients=tf.UnconnectedGradients.ZERO) return tf.concat([ tf.reshape(jacobian, shape=(tf.shape(input=jacobian)[0], -1)) for jacobian in jacobians ], axis=-1) with tf.GradientTape(watch_accessed_variables=False, persistent=True) as tape: for variable in variables: tape.watch(variable) values = _compute_residual_values(residuals, variables) jacobian = _compute_jacobian(values, variables, tape) del tape values = tf.expand_dims(values, axis=-1) return values, jacobian
Example #22
Source File: train_model_util_TensorFlow.py From Awesome-RecSystem-Models with MIT License | 5 votes |
def train_one_step(model, optimizer, idx, value, label): with tf.GradientTape() as tape: output = model(idx, value) loss = cross_entropy_loss(y_true=label, y_pred=output) reg_loss = [] for p in model.trainable_variables: reg_loss.append(tf.nn.l2_loss(p)) reg_loss = tf.reduce_sum(tf.stack(reg_loss)) loss = loss + model.reg_l2 * reg_loss grads = tape.gradient(loss, model.trainable_variables) grads = [tf.clip_by_norm(g, 100) for g in grads] optimizer.apply_gradients(grads_and_vars=zip(grads, model.trainable_variables)) return loss
Example #23
Source File: gaussian_process_test.py From BERT with Apache License 2.0 | 5 votes |
def testSparseGaussianProcess(self): dataset_size = 10 batch_size = 3 input_dim = 4 output_dim = 5 features = tf.to_float(np.random.rand(batch_size, input_dim)) labels = tf.to_float(np.random.rand(batch_size, output_dim)) model = gaussian_process.SparseGaussianProcess(output_dim, num_inducing=2) with tf.GradientTape() as tape: predictions = model(features) nll = -tf.reduce_mean(predictions.distribution.log_prob(labels)) kl = sum(model.losses) / dataset_size loss = nll + kl self.evaluate(tf.global_variables_initializer()) grads = tape.gradient(nll, model.variables) for grad in grads: self.assertIsNotNone(grad) loss_val, predictions_val = self.evaluate([loss, predictions]) self.assertEqual(loss_val.shape, ()) self.assertGreaterEqual(loss_val, 0.) self.assertEqual(predictions_val.shape, (batch_size, output_dim))
Example #24
Source File: tf2_hisan.py From Projects with MIT License | 5 votes |
def _train_step(self,text,labels): with tf.GradientTape() as tape: predictions = self.model(text,training=True) loss = self.loss_object(labels,predictions) gradients = tape.gradient(loss,self.model.trainable_variables) self.optimizer.apply_gradients(zip(gradients,self.model.trainable_variables)) return predictions, loss
Example #25
Source File: blocks.py From BERT with Apache License 2.0 | 5 votes |
def backward_grads_and_vars(self, y, dy, training=True): """Manually compute backward gradients given input and output grads.""" dy1, dy2 = tf.split(dy, num_or_size_splits=2, axis=self.axis) with tf.GradientTape(persistent=True) as tape: y = tf.identity(y) tape.watch(y) y1, y2 = tf.split(y, num_or_size_splits=2, axis=self.axis) z1 = y1 gz1 = self.g(z1, training=training) x2 = y2 - gz1 fx2 = self.f(x2, training=training) x1 = z1 - fx2 grads_combined = tape.gradient( gz1, [z1] + self.g.trainable_variables, output_gradients=dy2) dz1 = dy1 + grads_combined[0] dg = grads_combined[1:] dx1 = dz1 grads_combined = tape.gradient( fx2, [x2] + self.f.trainable_variables, output_gradients=dz1) dx2 = dy2 + grads_combined[0] df = grads_combined[1:] del tape grads = df + dg vars_ = self.f.trainable_variables + self.g.trainable_variables x = tf.concat([x1, x2], axis=self.axis) dx = tf.concat([dx1, dx2], axis=self.axis) return x, dx, grads, vars_
Example #26
Source File: quantizers_test.py From larq with Apache License 2.0 | 5 votes |
def test_ste_grad(self, fn): @np.vectorize def ste_grad(x): if np.abs(x) <= 1: return 1.0 return 0.0 x = testing_utils.generate_real_values_with_zeros(shape=(8, 3, 3, 16)) tf_x = tf.Variable(x) with tf.GradientTape() as tape: activation = fn(tf_x) grad = tape.gradient(activation, tf_x) np.testing.assert_allclose(grad.numpy(), ste_grad(x)) # Test with and without default threshold
Example #27
Source File: quantizers_test.py From larq with Apache License 2.0 | 5 votes |
def test_approx_sign_grad(self): @np.vectorize def approx_sign_grad(x): if np.abs(x) <= 1: return 2 - 2 * np.abs(x) return 0.0 x = testing_utils.generate_real_values_with_zeros(shape=(8, 3, 3, 16)) tf_x = tf.Variable(x) with tf.GradientTape() as tape: activation = lq.quantizers.ApproxSign()(tf_x) grad = tape.gradient(activation, tf_x) np.testing.assert_allclose(grad.numpy(), approx_sign_grad(x))
Example #28
Source File: blocks.py From BERT with Apache License 2.0 | 5 votes |
def backward_grads_and_vars(self, x, y, dy, training=True): """Apply reversible block backward to outputs.""" grads_all = [] vars_all = [] for i in reversed(range(len(self.blocks))): block = self.blocks[i] if i == 0: # First block usually contains downsampling that can't be reversed with tf.GradientTape() as tape: x = tf.identity(x) tape.watch(x) y = block(x, training=training) grads_combined = tape.gradient( y, [x] + block.trainable_variables, output_gradients=dy) dy = grads_combined[0] grads_all += grads_combined[1:] vars_all += block.trainable_variables else: y, dy, grads, vars_ = block.backward_grads_and_vars( y, dy, training=training) grads_all += grads vars_all += vars_ return dy, grads_all, vars_all
Example #29
Source File: blocks.py From BERT with Apache License 2.0 | 5 votes |
def backward_grads_and_vars(self, x, y, dy, training=True): """Apply reversible block backward to outputs.""" grads_all = [] vars_all = [] for i in reversed(range(len(self.blocks))): block = self.blocks[i] if i == 0: # First block usually contains downsampling that can't be reversed with tf.GradientTape() as tape: x = tf.identity(x) tape.watch(x) y = block(x, training=training) grads_combined = tape.gradient( y, [x] + block.trainable_variables, output_gradients=dy) dy = grads_combined[0] grads_all += grads_combined[1:] vars_all += block.trainable_variables else: y, dy, grads, vars_ = block.backward_grads_and_vars( y, dy, training=training) grads_all += grads vars_all += vars_ return dy, grads_all, vars_all
Example #30
Source File: quantizers_test.py From larq with Apache License 2.0 | 5 votes |
def test_magnitude_aware_sign_grad(self): a = np.random.uniform(-2, 2, (3, 2, 2, 3)) x = tf.Variable(a) with tf.GradientTape() as tape: y = lq.quantizers.MagnitudeAwareSign()(x) grad = tape.gradient(y, x) scale_vector = [ np.mean(np.reshape(np.abs(a[:, :, :, i]), [-1])) for i in range(3) ] np.testing.assert_allclose( grad.numpy(), np.where(abs(a) < 1, np.ones(a.shape) * scale_vector, 0) )