Python tensorflow.python.ops.state_ops.scatter_add() Examples
The following are 30
code examples of tensorflow.python.ops.state_ops.scatter_add().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.python.ops.state_ops
, or try the search function
.
Example #1
Source File: optimizer.py From bert-multitask-learning with MIT License | 5 votes |
def _apply_sparse(self, grad, var): return self._apply_sparse_shared( grad.values, var, grad.indices, lambda x, i, v: state_ops.scatter_add( # pylint: disable=g-long-lambda x, i, v, use_locking=self._use_locking))
Example #2
Source File: weight_decay_optimizers.py From robust_audio_ae with BSD 2-Clause "Simplified" License | 5 votes |
def _apply_sparse(self, grad, var): scatter_add = state_ops.scatter_add decay_op = self._decay_weights_sparse_op(var, grad.indices, scatter_add) with ops.control_dependencies([decay_op]): return super(DecoupledWeightDecayExtension, self)._apply_sparse( grad, var)
Example #3
Source File: adamW.py From Conditional_Density_Estimation with MIT License | 5 votes |
def _decay_weights_sparse_op(self, var, indices, scatter_add): if not self._decay_var_list or var in self._decay_var_list: return scatter_add(var, indices, -self._weight_decay * var, self._use_locking) return control_flow_ops.no_op() # Here, we overwrite the apply functions that the base optimizer calls. # super().apply_x resolves to the apply_x function of the BaseOptimizer.
Example #4
Source File: AMSGrad.py From AMSGrad-Tensorflow with MIT License | 5 votes |
def _apply_sparse(self, grad, var): return self._apply_sparse_shared( grad.values, var, grad.indices, lambda x, i, v: state_ops.scatter_add( # pylint: disable=g-long-lambda x, i, v, use_locking=self._use_locking))
Example #5
Source File: AMSGrad.py From AMSGrad-Tensorflow with MIT License | 5 votes |
def _apply_sparse_shared(self, grad, var, indices, scatter_add): beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype) beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking) with ops.control_dependencies([m_t]): m_t = scatter_add(m, indices, m_scaled_g_values) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking) with ops.control_dependencies([v_t]): v_t = scatter_add(v, indices, v_scaled_g_values) # amsgrad vhat = self.get_slot(var, "vhat") vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
Example #6
Source File: lamb_optimizer_v1.py From training with Apache License 2.0 | 5 votes |
def _apply_sparse(self, grad, var): return self._apply_sparse_shared( grad.values, var, grad.indices, lambda x, i, v: state_ops.scatter_add( # pylint: disable=g-long-lambda x, i, v, use_locking=self._use_locking))
Example #7
Source File: weight_decay_optimizers.py From robust_audio_ae with BSD 2-Clause "Simplified" License | 5 votes |
def _decay_weights_sparse_op(self, var, indices, scatter_add): if not self._decay_var_list or var in self._decay_var_list: return scatter_add(var, indices, -self._weight_decay * var, self._use_locking) return control_flow_ops.no_op() # Here, we overwrite the apply functions that the base optimizer calls. # super().apply_x resolves to the apply_x function of the BaseOptimizer.
Example #8
Source File: RAdam.py From RAdam-Tensorflow with MIT License | 5 votes |
def _apply_sparse(self, grad, var): return self._apply_sparse_shared( grad.values, var, grad.indices, lambda x, i, v: state_ops.scatter_add(x, i, v, use_locking=self._use_locking))
Example #9
Source File: training.py From keras-radam with MIT License | 5 votes |
def _apply_sparse(self, grad, var): return self._apply_sparse_shared( grad.values, var, grad.indices, lambda x, i, v: state_ops.scatter_add(x, i, v, use_locking=self._use_locking))
Example #10
Source File: optimization_gpu.py From BERT-multi-gpu with Apache License 2.0 | 5 votes |
def _apply_sparse_shared(self, grad, var, indices, scatter_add): learning_rate_t = math_ops.cast( self.learning_rate_t, var.dtype.base_dtype) beta_1_t = math_ops.cast(self.beta_1_t, var.dtype.base_dtype) beta_2_t = math_ops.cast(self.beta_2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self.epsilon_t, var.dtype.base_dtype) weight_decay_rate_t = math_ops.cast( self.weight_decay_rate_t, var.dtype.base_dtype) m = self.get_slot(var, 'm') v = self.get_slot(var, 'v') m_t = state_ops.assign(m, m * beta_1_t, use_locking=self._use_locking) m_scaled_g_values = grad * (1 - beta_1_t) with ops.control_dependencies([m_t]): m_t = scatter_add(m, indices, m_scaled_g_values) v_scaled_g_values = (grad * grad) * (1 - beta_2_t) v_t = state_ops.assign(v, v * beta_2_t, use_locking=self._use_locking) with ops.control_dependencies([v_t]): v_t = scatter_add(v, indices, v_scaled_g_values) update = m_t / (math_ops.sqrt(v_t) + epsilon_t) if self._do_use_weight_decay(var.name): update += weight_decay_rate_t * var update_with_lr = learning_rate_t * update var_update = state_ops.assign_sub(var, update_with_lr, use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t])
Example #11
Source File: optimization_gpu.py From BERT-multi-gpu with Apache License 2.0 | 5 votes |
def _apply_sparse(self, grad, var): return self._apply_sparse_shared( grad.values, var, grad.indices, lambda x, i, v: state_ops.scatter_add( # pylint: disable=g-long-lambda x, i, v, use_locking=self._use_locking))
Example #12
Source File: adam.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License | 5 votes |
def _apply_sparse_shared(self, grad, var, indices, scatter_add): beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype) beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking) with ops.control_dependencies([m_t]): m_t = scatter_add(m, indices, m_scaled_g_values) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking) with ops.control_dependencies([v_t]): v_t = scatter_add(v, indices, v_scaled_g_values) v_sqrt = math_ops.sqrt(v_t) var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t])
Example #13
Source File: adam.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License | 5 votes |
def _apply_sparse(self, grad, var): return self._apply_sparse_shared( grad.values, var, grad.indices, lambda x, i, v: state_ops.scatter_add( # pylint: disable=g-long-lambda x, i, v, use_locking=self._use_locking))
Example #14
Source File: adam.py From keras-lambda with MIT License | 5 votes |
def _apply_sparse(self, grad, var): beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype) beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad.values * (1 - beta1_t) m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking) m_t = state_ops.scatter_add(m_t, grad.indices, m_scaled_g_values, use_locking=self._use_locking) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad.values * grad.values) * (1 - beta2_t) v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking) v_t = state_ops.scatter_add(v_t, grad.indices, v_scaled_g_values, use_locking=self._use_locking) v_sqrt = math_ops.sqrt(v_t) var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t])
Example #15
Source File: weight_decay_optimizers.py From robust_audio_ae with BSD 2-Clause "Simplified" License | 5 votes |
def _resource_apply_sparse(self, grad, var, indices): scatter_add = self._resource_scatter_add decay_op = self._decay_weights_sparse_op(var, indices, scatter_add) with ops.control_dependencies([decay_op]): return super(DecoupledWeightDecayExtension, self)._resource_apply_sparse( grad, var, indices)
Example #16
Source File: lamb_optimizer_v1.py From training with Apache License 2.0 | 5 votes |
def _apply_sparse(self, grad, var): return self._apply_sparse_shared( grad.values, var, grad.indices, lambda x, i, v: state_ops.scatter_add( # pylint: disable=g-long-lambda x, i, v, use_locking=self._use_locking))
Example #17
Source File: optimizer.py From bert-multitask-learning with MIT License | 5 votes |
def _apply_sparse_shared(self, grad, var, indices, scatter_add): learning_rate_t = math_ops.cast( self.learning_rate_t, var.dtype.base_dtype) beta_1_t = math_ops.cast(self.beta_1_t, var.dtype.base_dtype) beta_2_t = math_ops.cast(self.beta_2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self.epsilon_t, var.dtype.base_dtype) weight_decay_rate_t = math_ops.cast( self.weight_decay_rate_t, var.dtype.base_dtype) m = self.get_slot(var, 'm') v = self.get_slot(var, 'v') m_t = state_ops.assign(m, m * beta_1_t, use_locking=self._use_locking) m_scaled_g_values = grad * (1 - beta_1_t) with ops.control_dependencies([m_t]): m_t = scatter_add(m, indices, m_scaled_g_values) v_scaled_g_values = (grad * grad) * (1 - beta_2_t) v_t = state_ops.assign(v, v * beta_2_t, use_locking=self._use_locking) with ops.control_dependencies([v_t]): v_t = scatter_add(v, indices, v_scaled_g_values) update = m_t / (math_ops.sqrt(v_t) + epsilon_t) if self._do_use_weight_decay(var.name): update += weight_decay_rate_t * var update_with_lr = learning_rate_t * update var_update = state_ops.assign_sub(var, update_with_lr, use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t])
Example #18
Source File: AdaBound.py From AdaBound-Tensorflow with Apache License 2.0 | 5 votes |
def _apply_sparse(self, grad, var): return self._apply_sparse_shared( grad.values, var, grad.indices, lambda x, i, v: state_ops.scatter_add( # pylint: disable=g-long-lambda x, i, v, use_locking=self._use_locking))
Example #19
Source File: AMSGrad.py From PhysNet with MIT License | 5 votes |
def _apply_sparse(self, grad, var): return self._apply_sparse_shared( grad.values, var, grad.indices, lambda x, i, v: state_ops.scatter_add( # pylint: disable=g-long-lambda x, i, v, use_locking=self._use_locking))
Example #20
Source File: AMSGrad.py From PhysNet with MIT License | 5 votes |
def _apply_sparse_shared(self, grad, var, indices, scatter_add): beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype) beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking) with ops.control_dependencies([m_t]): m_t = scatter_add(m, indices, m_scaled_g_values) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking) with ops.control_dependencies([v_t]): v_t = scatter_add(v, indices, v_scaled_g_values) # amsgrad vhat = self.get_slot(var, "vhat") vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
Example #21
Source File: opt.py From EMNLP2018_NLI with GNU General Public License v3.0 | 5 votes |
def _apply_sparse(self, grad, var): return self._apply_sparse_shared( grad.values, var, grad.indices, lambda x, i, v: state_ops.scatter_add( # pylint: disable=g-long-lambda x, i, v, use_locking=self._use_locking))
Example #22
Source File: opt.py From EMNLP2018_NLI with GNU General Public License v3.0 | 5 votes |
def _finish(self, update_ops, name_scope): # Update the power accumulators. with ops.control_dependencies(update_ops): with ops.colocate_with(self._beta1_power): update_beta1 = self._beta1_power.assign( self._beta1_power * self._beta1_t, use_locking=self._use_locking) update_beta2 = self._beta2_power.assign( self._beta2_power * self._beta2_t, use_locking=self._use_locking) return control_flow_ops.group(*update_ops + [update_beta1, update_beta2], name=name_scope)* (1 - beta1_t) m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking) with ops.control_dependencies([m_t]): m_t = scatter_add(m, indices, m_scaled_g_values) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking) with ops.control_dependencies([v_t]): v_t = scatter_add(v, indices, v_scaled_g_values) # amsgrad vhat = self.get_slot(var, "vhat") vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
Example #23
Source File: opt.py From EMNLP2018_NLI with GNU General Public License v3.0 | 5 votes |
def _apply_sparse(self, grad, var): return self._apply_sparse_shared( grad.values, var, grad.indices, lambda x, i, v: state_ops.scatter_add( # pylint: disable=g-long-lambda x, i, v, use_locking=self._use_locking))
Example #24
Source File: opt.py From EMNLP2018_NLI with GNU General Public License v3.0 | 5 votes |
def _apply_sparse_shared(self, grad, var, indices, scatter_add): beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype) beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking) with ops.control_dependencies([m_t]): m_t = scatter_add(m, indices, m_scaled_g_values) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking) with ops.control_dependencies([v_t]): v_t = scatter_add(v, indices, v_scaled_g_values) # amsgrad vhat = self.get_slot(var, "vhat") vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
Example #25
Source File: opt.py From EMNLP2018_NLI with GNU General Public License v3.0 | 5 votes |
def _apply_sparse_shared(self, grad, var, indices, scatter_add): beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype) beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad
Example #26
Source File: adamW.py From Conditional_Density_Estimation with MIT License | 5 votes |
def _resource_apply_sparse(self, grad, var, indices): scatter_add = self._resource_scatter_add decay_op = self._decay_weights_sparse_op(var, indices, scatter_add) with ops.control_dependencies([decay_op]): return super(DecoupledWeightDecayExtension, self)._resource_apply_sparse( grad, var, indices)
Example #27
Source File: adamW.py From Conditional_Density_Estimation with MIT License | 5 votes |
def _resource_scatter_add(self, x, i, v, _=None): # last argument allows for one overflow argument, to have the same function # signature as state_ops.scatter_add with ops.control_dependencies( [resource_variable_ops.resource_scatter_add(x.handle, i, v)]): return x.value()
Example #28
Source File: adamW.py From Conditional_Density_Estimation with MIT License | 5 votes |
def _apply_sparse(self, grad, var): scatter_add = state_ops.scatter_add decay_op = self._decay_weights_sparse_op(var, grad.indices, scatter_add) with ops.control_dependencies([decay_op]): return super(DecoupledWeightDecayExtension, self)._apply_sparse( grad, var)
Example #29
Source File: AMSGrad.py From scGAN with MIT License | 5 votes |
def _apply_sparse_shared(self, grad, var, indices, scatter_add): beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype) beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking) with ops.control_dependencies([m_t]): m_t = scatter_add(m, indices, m_scaled_g_values) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking) with ops.control_dependencies([v_t]): v_t = scatter_add(v, indices, v_scaled_g_values) # amsgrad vhat = self.get_slot(var, "vhat") vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
Example #30
Source File: AMSGrad.py From DCRNN with MIT License | 5 votes |
def _apply_sparse(self, grad, var): return self._apply_sparse_shared( grad.values, var, grad.indices, lambda x, i, v: state_ops.scatter_add( # pylint: disable=g-long-lambda x, i, v, use_locking=self._use_locking))