Python tensorflow.python.ops.clip_ops.clip_by_value() Examples
The following are 30
code examples of tensorflow.python.ops.clip_ops.clip_by_value().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.python.ops.clip_ops
, or try the search function
.
Example #1
Source File: attention_wrapper.py From tf-var-attention with MIT License | 6 votes |
def safe_cumprod(x, *args, **kwargs): """Computes cumprod of x in logspace using cumsum to avoid underflow. The cumprod function and its gradient can result in numerical instabilities when its argument has very small and/or zero values. As long as the argument is all positive, we can instead compute the cumulative product as exp(cumsum(log(x))). This function can be called identically to tf.cumprod. Args: x: Tensor to take the cumulative product of. *args: Passed on to cumsum; these are identical to those in cumprod. **kwargs: Passed on to cumsum; these are identical to those in cumprod. Returns: Cumulative product of x. """ with ops.name_scope(None, "SafeCumprod", [x]): x = ops.convert_to_tensor(x, name="x") tiny = np.finfo(x.dtype.as_numpy_dtype).tiny return math_ops.exp(math_ops.cumsum( math_ops.log(clip_ops.clip_by_value(x, tiny, 1)), *args, **kwargs))
Example #2
Source File: attention_wrapper.py From CommonSenseMultiHopQA with MIT License | 6 votes |
def safe_cumprod(x, *args, **kwargs): """Computes cumprod of x in logspace using cumsum to avoid underflow. The cumprod function and its gradient can result in numerical instabilities when its argument has very small and/or zero values. As long as the argument is all positive, we can instead compute the cumulative product as exp(cumsum(log(x))). This function can be called identically to tf.cumprod. Args: x: Tensor to take the cumulative product of. *args: Passed on to cumsum; these are identical to those in cumprod. **kwargs: Passed on to cumsum; these are identical to those in cumprod. Returns: Cumulative product of x. """ with ops.name_scope(None, "SafeCumprod", [x]): x = ops.convert_to_tensor(x, name="x") tiny = np.finfo(x.dtype.as_numpy_dtype).tiny return math_ops.exp(math_ops.cumsum( math_ops.log(clip_ops.clip_by_value(x, tiny, 1)), *args, **kwargs))
Example #3
Source File: backend.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License | 6 votes |
def clip(x, min_value, max_value): """Element-wise value clipping. Arguments: x: Tensor or variable. min_value: Python float or integer. max_value: Python float or integer. Returns: A tensor. """ if max_value is not None and max_value < min_value: max_value = min_value if max_value is None: max_value = np.inf min_value = _to_tensor(min_value, x.dtype.base_dtype) max_value = _to_tensor(max_value, x.dtype.base_dtype) return clip_ops.clip_by_value(x, min_value, max_value)
Example #4
Source File: attention_wrapper.py From OpenSeq2Seq with Apache License 2.0 | 6 votes |
def safe_cumprod(x, *args, **kwargs): """Computes cumprod of x in logspace using cumsum to avoid underflow. The cumprod function and its gradient can result in numerical instabilities when its argument has very small and/or zero values. As long as the argument is all positive, we can instead compute the cumulative product as exp(cumsum(log(x))). This function can be called identically to tf.cumprod. Args: x: Tensor to take the cumulative product of. *args: Passed on to cumsum; these are identical to those in cumprod. **kwargs: Passed on to cumsum; these are identical to those in cumprod. Returns: Cumulative product of x. """ with ops.name_scope(None, "SafeCumprod", [x]): x = ops.convert_to_tensor(x, name="x") tiny = np.finfo(x.dtype.as_numpy_dtype).tiny return math_ops.exp( math_ops.cumsum( math_ops.log(clip_ops.clip_by_value(x, tiny, 1)), *args, **kwargs ) )
Example #5
Source File: attention_wrapper.py From QGforQA with MIT License | 6 votes |
def safe_cumprod(x, *args, **kwargs): """Computes cumprod of x in logspace using cumsum to avoid underflow. The cumprod function and its gradient can result in numerical instabilities when its argument has very small and/or zero values. As long as the argument is all positive, we can instead compute the cumulative product as exp(cumsum(log(x))). This function can be called identically to tf.cumprod. Args: x: Tensor to take the cumulative product of. *args: Passed on to cumsum; these are identical to those in cumprod. **kwargs: Passed on to cumsum; these are identical to those in cumprod. Returns: Cumulative product of x. """ with ops.name_scope(None, "SafeCumprod", [x]): x = ops.convert_to_tensor(x, name="x") tiny = np.finfo(x.dtype.as_numpy_dtype).tiny return math_ops.exp(math_ops.cumsum( math_ops.log(clip_ops.clip_by_value(x, tiny, 1)), *args, **kwargs))
Example #6
Source File: attention_wrapper_mod.py From NQG_ASs2s with MIT License | 6 votes |
def safe_cumprod(x, *args, **kwargs): """Computes cumprod of x in logspace using cumsum to avoid underflow. The cumprod function and its gradient can result in numerical instabilities when its argument has very small and/or zero values. As long as the argument is all positive, we can instead compute the cumulative product as exp(cumsum(log(x))). This function can be called identically to tf.cumprod. Args: x: Tensor to take the cumulative product of. *args: Passed on to cumsum; these are identical to those in cumprod. **kwargs: Passed on to cumsum; these are identical to those in cumprod. Returns: Cumulative product of x. """ with ops.name_scope(None, "SafeCumprod", [x]): x = ops.convert_to_tensor(x, name="x") tiny = np.finfo(x.dtype.as_numpy_dtype).tiny return math_ops.exp(math_ops.cumsum( math_ops.log(clip_ops.clip_by_value(x, tiny, 1)), *args, **kwargs))
Example #7
Source File: backend.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License | 6 votes |
def relu(x, alpha=0., max_value=None): """Rectified linear unit. With default values, it returns element-wise `max(x, 0)`. Arguments: x: A tensor or variable. alpha: A scalar, slope of negative section (default=`0.`). max_value: Saturation threshold. Returns: A tensor. """ if alpha != 0.: negative_part = nn.relu(-x) x = nn.relu(x) if max_value is not None: max_value = _to_tensor(max_value, x.dtype.base_dtype) zero = _to_tensor(0., x.dtype.base_dtype) x = clip_ops.clip_by_value(x, zero, max_value) if alpha != 0.: alpha = _to_tensor(alpha, x.dtype.base_dtype) x -= alpha * negative_part return x
Example #8
Source File: devel.py From avsr-tf1 with GNU General Public License v3.0 | 6 votes |
def mc_loss(labels, logits): r""" A multi-class cross-entropy loss :param labels: [batch_size, ] - Tensor of the correct class ids :param logits: [batch_size, num_classes] - Unscaled logits :return: [batch_size, ] - Tensor of average costs for each batch element """ num_classes = array_ops.shape(logits)[1] onehot_labels = array_ops.one_hot(labels, num_classes, dtype=logits.dtype) p = nn_ops.softmax(logits) p = clip_ops.clip_by_value(p, 1e-7, 1.0 - 1e-7) ce_loss = - onehot_labels * math_ops.log(p) - (1 - onehot_labels) * math_ops.log(1.0-p) cost = math_ops.reduce_sum(ce_loss, axis=1) return cost
Example #9
Source File: devel.py From avsr-tf1 with GNU General Public License v3.0 | 6 votes |
def focal_loss(labels, logits, gamma=2.0): r""" Multi-class focal loss implementation: https://arxiv.org/abs/1708.02002 :param labels: [batch_size, ] - Tensor of the correct class ids :param logits: [batch_size, num_classes] - Unscaled logits :param gamma: focal loss weight :return: [batch_size, ] - Tensor of average costs for each batch element """ num_classes = array_ops.shape(logits)[1] onehot_labels = array_ops.one_hot(labels, num_classes, dtype=logits.dtype) p = nn_ops.softmax(logits) p = clip_ops.clip_by_value(p, 1e-7, 1.0 - 1e-7) f_loss = - onehot_labels * math_ops.pow(1.0 - p, gamma) * math_ops.log(p) \ - (1 - onehot_labels) * math_ops.pow(p, gamma) * math_ops.log(1.0 - p) cost = math_ops.reduce_sum(f_loss, axis=1) return cost
Example #10
Source File: backend.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License | 6 votes |
def hard_sigmoid(x): """Segment-wise linear approximation of sigmoid. Faster than sigmoid. Returns `0.` if `x < -2.5`, `1.` if `x > 2.5`. In `-2.5 <= x <= 2.5`, returns `0.2 * x + 0.5`. Arguments: x: A tensor or variable. Returns: A tensor. """ x = (0.2 * x) + 0.5 zero = _to_tensor(0., x.dtype.base_dtype) one = _to_tensor(1., x.dtype.base_dtype) x = clip_ops.clip_by_value(x, zero, one) return x
Example #11
Source File: copy_attention_wrapper.py From question-generation with MIT License | 6 votes |
def safe_cumprod(x, *args, **kwargs): """Computes cumprod of x in logspace using cumsum to avoid underflow. The cumprod function and its gradient can result in numerical instabilities when its argument has very small and/or zero values. As long as the argument is all positive, we can instead compute the cumulative product as exp(cumsum(log(x))). This function can be called identically to tf.cumprod. Args: x: Tensor to take the cumulative product of. *args: Passed on to cumsum; these are identical to those in cumprod. **kwargs: Passed on to cumsum; these are identical to those in cumprod. Returns: Cumulative product of x. """ with ops.name_scope(None, "SafeCumprod", [x]): x = ops.convert_to_tensor(x, name="x") tiny = np.finfo(x.dtype.as_numpy_dtype).tiny return math_ops.exp(math_ops.cumsum( math_ops.log(clip_ops.clip_by_value(x, tiny, 1)), *args, **kwargs))
Example #12
Source File: backend.py From lambda-packs with MIT License | 6 votes |
def hard_sigmoid(x): """Segment-wise linear approximation of sigmoid. Faster than sigmoid. Returns `0.` if `x < -2.5`, `1.` if `x > 2.5`. In `-2.5 <= x <= 2.5`, returns `0.2 * x + 0.5`. Arguments: x: A tensor or variable. Returns: A tensor. """ x = (0.2 * x) + 0.5 zero = _to_tensor(0., x.dtype.base_dtype) one = _to_tensor(1., x.dtype.base_dtype) x = clip_ops.clip_by_value(x, zero, one) return x
Example #13
Source File: backend.py From lambda-packs with MIT License | 6 votes |
def binary_crossentropy(output, target, from_logits=False): """Binary crossentropy between an output tensor and a target tensor. Arguments: output: A tensor. target: A tensor with the same shape as `output`. from_logits: Whether `output` is expected to be a logits tensor. By default, we consider that `output` encodes a probability distribution. Returns: A tensor. """ # Note: nn.softmax_cross_entropy_with_logits # expects logits, Keras expects probabilities. if not from_logits: # transform back to logits epsilon = _to_tensor(_EPSILON, output.dtype.base_dtype) output = clip_ops.clip_by_value(output, epsilon, 1 - epsilon) output = math_ops.log(output / (1 - output)) return nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
Example #14
Source File: backend.py From lambda-packs with MIT License | 6 votes |
def relu(x, alpha=0., max_value=None): """Rectified linear unit. With default values, it returns element-wise `max(x, 0)`. Arguments: x: A tensor or variable. alpha: A scalar, slope of negative section (default=`0.`). max_value: Saturation threshold. Returns: A tensor. """ if alpha != 0.: negative_part = nn.relu(-x) x = nn.relu(x) if max_value is not None: max_value = _to_tensor(max_value, x.dtype.base_dtype) zero = _to_tensor(0., x.dtype.base_dtype) x = clip_ops.clip_by_value(x, zero, max_value) if alpha != 0.: alpha = _to_tensor(alpha, x.dtype.base_dtype) x -= alpha * negative_part return x
Example #15
Source File: backend.py From lambda-packs with MIT License | 6 votes |
def clip(x, min_value, max_value): """Element-wise value clipping. Arguments: x: Tensor or variable. min_value: Python float or integer. max_value: Python float or integer. Returns: A tensor. """ if max_value is not None and max_value < min_value: max_value = min_value if max_value is None: max_value = np.inf min_value = _to_tensor(min_value, x.dtype.base_dtype) max_value = _to_tensor(max_value, x.dtype.base_dtype) return clip_ops.clip_by_value(x, min_value, max_value)
Example #16
Source File: backend.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License | 5 votes |
def sparse_categorical_crossentropy(target, output, from_logits=False): """Categorical crossentropy with integer targets. Arguments: target: An integer tensor. output: A tensor resulting from a softmax (unless `from_logits` is True, in which case `output` is expected to be the logits). from_logits: Boolean, whether `output` is the result of a softmax, or is a tensor of logits. Returns: Output tensor. """ # Note: nn.sparse_softmax_cross_entropy_with_logits # expects logits, Keras expects probabilities. if not from_logits: epsilon_ = _to_tensor(epsilon(), output.dtype.base_dtype) output = clip_ops.clip_by_value(output, epsilon_, 1 - epsilon_) output = math_ops.log(output) output_shape = output.get_shape() targets = cast(flatten(target), 'int64') logits = array_ops.reshape(output, [-1, int(output_shape[-1])]) res = nn.sparse_softmax_cross_entropy_with_logits( labels=targets, logits=logits) if len(output_shape) == 3: # if our output includes timesteps we need to reshape return array_ops.reshape(res, array_ops.shape(output)[:-1]) else: return res
Example #17
Source File: backend.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License | 5 votes |
def categorical_crossentropy(target, output, from_logits=False): """Categorical crossentropy between an output tensor and a target tensor. Arguments: target: A tensor of the same shape as `output`. output: A tensor resulting from a softmax (unless `from_logits` is True, in which case `output` is expected to be the logits). from_logits: Boolean, whether `output` is the result of a softmax, or is a tensor of logits. Returns: Output tensor. """ # Note: nn.softmax_cross_entropy_with_logits # expects logits, Keras expects probabilities. if not from_logits: # scale preds so that the class probas of each sample sum to 1 output /= math_ops.reduce_sum( output, axis=len(output.get_shape()) - 1, keep_dims=True) # manual computation of crossentropy epsilon_ = _to_tensor(epsilon(), output.dtype.base_dtype) output = clip_ops.clip_by_value(output, epsilon_, 1. - epsilon_) return -math_ops.reduce_sum( target * math_ops.log(output), axis=len(output.get_shape()) - 1) else: return nn.softmax_cross_entropy_with_logits(labels=target, logits=output)
Example #18
Source File: backend.py From lambda-packs with MIT License | 5 votes |
def categorical_crossentropy(output, target, from_logits=False): """Categorical crossentropy between an output tensor and a target tensor. Arguments: output: A tensor resulting from a softmax (unless `from_logits` is True, in which case `output` is expected to be the logits). target: A tensor of the same shape as `output`. from_logits: Boolean, whether `output` is the result of a softmax, or is a tensor of logits. Returns: Output tensor. """ # Note: nn.softmax_cross_entropy_with_logits # expects logits, Keras expects probabilities. if not from_logits: # scale preds so that the class probas of each sample sum to 1 output /= math_ops.reduce_sum( output, reduction_indices=len(output.get_shape()) - 1, keep_dims=True) # manual computation of crossentropy epsilon = _to_tensor(_EPSILON, output.dtype.base_dtype) output = clip_ops.clip_by_value(output, epsilon, 1. - epsilon) return -math_ops.reduce_sum( target * math_ops.log(output), reduction_indices=len(output.get_shape()) - 1) else: return nn.softmax_cross_entropy_with_logits(labels=target, logits=output)
Example #19
Source File: backend.py From lambda-packs with MIT License | 5 votes |
def sqrt(x): """Element-wise square root. Arguments: x: Tensor or variable. Returns: A tensor. """ zero = _to_tensor(0., x.dtype.base_dtype) inf = _to_tensor(np.inf, x.dtype.base_dtype) x = clip_ops.clip_by_value(x, zero, inf) return math_ops.sqrt(x)
Example #20
Source File: backend.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License | 5 votes |
def sqrt(x): """Element-wise square root. Arguments: x: Tensor or variable. Returns: A tensor. """ zero = _to_tensor(0., x.dtype.base_dtype) inf = _to_tensor(np.inf, x.dtype.base_dtype) x = clip_ops.clip_by_value(x, zero, inf) return math_ops.sqrt(x)
Example #21
Source File: image_ops_impl.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License | 4 votes |
def adjust_saturation(image, saturation_factor, name=None): """Adjust saturation of an RGB image. This is a convenience method that converts an RGB image to float representation, converts it to HSV, add an offset to the saturation channel, converts back to RGB and then back to the original data type. If several adjustments are chained it is advisable to minimize the number of redundant conversions. `image` is an RGB image. The image saturation is adjusted by converting the image to HSV and multiplying the saturation (S) channel by `saturation_factor` and clipping. The image is then converted back to RGB. Args: image: RGB image or images. Size of the last dimension must be 3. saturation_factor: float. Factor to multiply the saturation by. name: A name for this operation (optional). Returns: Adjusted image(s), same shape and DType as `image`. """ with ops.name_scope(name, 'adjust_saturation', [image]) as name: image = ops.convert_to_tensor(image, name='image') # Remember original dtype to so we can convert back if needed orig_dtype = image.dtype flt_image = convert_image_dtype(image, dtypes.float32) # TODO(zhengxq): we will switch to the fused version after we add a GPU # kernel for that. fused = os.environ.get('TF_ADJUST_SATURATION_FUSED', '') fused = fused.lower() in ('true', 't', '1') if fused: return convert_image_dtype( gen_image_ops.adjust_saturation(flt_image, saturation_factor), orig_dtype) hsv = gen_image_ops.rgb_to_hsv(flt_image) hue = array_ops.slice(hsv, [0, 0, 0], [-1, -1, 1]) saturation = array_ops.slice(hsv, [0, 0, 1], [-1, -1, 1]) value = array_ops.slice(hsv, [0, 0, 2], [-1, -1, 1]) saturation *= saturation_factor saturation = clip_ops.clip_by_value(saturation, 0.0, 1.0) hsv_altered = array_ops.concat([hue, saturation, value], 2) rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered) return convert_image_dtype(rgb_altered, orig_dtype)
Example #22
Source File: official_tf_image.py From X-Detector with Apache License 2.0 | 4 votes |
def adjust_saturation(image, saturation_factor, name=None): """Adjust saturation of an RGB image. This is a convenience method that converts an RGB image to float representation, converts it to HSV, add an offset to the saturation channel, converts back to RGB and then back to the original data type. If several adjustments are chained it is advisable to minimize the number of redundant conversions. `image` is an RGB image. The image saturation is adjusted by converting the image to HSV and multiplying the saturation (S) channel by `saturation_factor` and clipping. The image is then converted back to RGB. Args: image: RGB image or images. Size of the last dimension must be 3. saturation_factor: float. Factor to multiply the saturation by. name: A name for this operation (optional). Returns: Adjusted image(s), same shape and DType as `image`. """ with ops.name_scope(name, 'adjust_saturation', [image]) as name: image = ops.convert_to_tensor(image, name='image') # Remember original dtype to so we can convert back if needed orig_dtype = image.dtype flt_image = convert_image_dtype(image, dtypes.float32) # TODO(zhengxq): we will switch to the fused version after we add a GPU # kernel for that. fused = os.environ.get('TF_ADJUST_SATURATION_FUSED', '') fused = fused.lower() in ('true', 't', '1') if fused: return convert_image_dtype( gen_image_ops.adjust_saturation(flt_image, saturation_factor), orig_dtype) hsv = gen_image_ops.rgb_to_hsv(flt_image) hue = array_ops.slice(hsv, [0, 0, 0], [-1, -1, 1]) saturation = array_ops.slice(hsv, [0, 0, 1], [-1, -1, 1]) value = array_ops.slice(hsv, [0, 0, 2], [-1, -1, 1]) saturation *= saturation_factor saturation = clip_ops.clip_by_value(saturation, 0.0, 1.0) hsv_altered = array_ops.concat([hue, saturation, value], 2) rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered) return convert_image_dtype(rgb_altered, orig_dtype)
Example #23
Source File: image_ops_impl.py From keras-lambda with MIT License | 4 votes |
def adjust_saturation(image, saturation_factor, name=None): """Adjust saturation of an RGB image. This is a convenience method that converts an RGB image to float representation, converts it to HSV, add an offset to the saturation channel, converts back to RGB and then back to the original data type. If several adjustments are chained it is advisable to minimize the number of redundant conversions. `image` is an RGB image. The image saturation is adjusted by converting the image to HSV and multiplying the saturation (S) channel by `saturation_factor` and clipping. The image is then converted back to RGB. Args: image: RGB image or images. Size of the last dimension must be 3. saturation_factor: float. Factor to multiply the saturation by. name: A name for this operation (optional). Returns: Adjusted image(s), same shape and DType as `image`. """ with ops.name_scope(name, 'adjust_saturation', [image]) as name: image = ops.convert_to_tensor(image, name='image') # Remember original dtype to so we can convert back if needed orig_dtype = image.dtype flt_image = convert_image_dtype(image, dtypes.float32) # TODO(zhengxq): we will switch to the fused version after we add a GPU # kernel for that. fused = os.environ.get('TF_ADJUST_SATURATION_FUSED', '') fused = fused.lower() in ('true', 't', '1') if fused: return convert_image_dtype( gen_image_ops.adjust_saturation(flt_image, saturation_factor), orig_dtype) hsv = gen_image_ops.rgb_to_hsv(flt_image) hue = array_ops.slice(hsv, [0, 0, 0], [-1, -1, 1]) saturation = array_ops.slice(hsv, [0, 0, 1], [-1, -1, 1]) value = array_ops.slice(hsv, [0, 0, 2], [-1, -1, 1]) saturation *= saturation_factor saturation = clip_ops.clip_by_value(saturation, 0.0, 1.0) hsv_altered = array_ops.concat([hue, saturation, value], 2) rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered) return convert_image_dtype(rgb_altered, orig_dtype)
Example #24
Source File: AdaBound.py From HyperGAN with MIT License | 4 votes |
def _apply(self, grad, var): graph = None if context.executing_eagerly() else ops.get_default_graph() lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) base_lr_t = math_ops.cast(self._base_lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) lr_t = lr_t * tf.sqrt(1-beta2_t)/(1-beta1_t) lower_bound = lr_t * self._lower_bound upper_bound = lr_t * self._upper_bound # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking) # amsgrad vhat = self.get_slot(var, "vhat") if self._amsbound : vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) else : vhat_t = state_ops.assign(vhat, vhat) v_sqrt = math_ops.sqrt(v_t) # Compute the bounds step_size_bound = lr_t / (v_sqrt + epsilon_t) if isinstance(self.config.lower_bound, int) and self.config.lower_bound < 0: bounded_lr = m_t * step_size_bound else: bounded_lr = m_t * clip_by_value(step_size_bound, lower_bound, upper_bound) if self._arad: bounded_lr *= (self.config.arad_lambda or 1.0) * tf.abs(m_t) var_update = state_ops.assign_sub(var, bounded_lr, use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
Example #25
Source File: AdaBound.py From AdaBound-Tensorflow with Apache License 2.0 | 4 votes |
def _apply_sparse_shared(self, grad, var, indices, scatter_add): graph = None if context.executing_eagerly() else ops.get_default_graph() beta1_power = math_ops.cast(self._get_non_slot_variable("beta1_power", graph=graph), var.dtype.base_dtype) beta2_power = math_ops.cast(self._get_non_slot_variable("beta2_power", graph=graph), var.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) base_lr_t = math_ops.cast(self._base_lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) gamma_t = math_ops.cast(self._gamma_t, var.dtype.base_dtype) step_size = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) final_lr = self._final_lr * lr_t / base_lr_t lower_bound = final_lr * (1. - 1. / (gamma_t + 1.)) upper_bound = final_lr * (1. + 1. / (gamma_t)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking) with ops.control_dependencies([m_t]): m_t = scatter_add(m, indices, m_scaled_g_values) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking) with ops.control_dependencies([v_t]): v_t = scatter_add(v, indices, v_scaled_g_values) # amsgrad vhat = self.get_slot(var, "vhat") if self._amsbound: vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) else: vhat_t = state_ops.assign(vhat, vhat) v_sqrt = math_ops.sqrt(v_t) # Compute the bounds step_size_bound = step_size / (v_sqrt + epsilon_t) bounded_lr = m_t * clip_by_value(step_size_bound, lower_bound, upper_bound) var_update = state_ops.assign_sub(var, bounded_lr, use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
Example #26
Source File: AdaBound.py From AdaBound-Tensorflow with Apache License 2.0 | 4 votes |
def _resource_apply_dense(self, grad, var): graph = None if context.executing_eagerly() else ops.get_default_graph() beta1_power = math_ops.cast(self._get_non_slot_variable("beta1_power", graph=graph), grad.dtype.base_dtype) beta2_power = math_ops.cast(self._get_non_slot_variable("beta2_power", graph=graph), grad.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, grad.dtype.base_dtype) base_lr_t = math_ops.cast(self._base_lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, grad.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, grad.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, grad.dtype.base_dtype) gamma_multi = math_ops.cast(self._get_non_slot_variable("gamma_multi", graph=graph), var.dtype.base_dtype) step_size = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) final_lr = self._final_lr * lr_t / base_lr_t lower_bound = final_lr * (1. - 1. / (gamma_multi + 1.)) upper_bound = final_lr * (1. + 1. / (gamma_multi)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking) # amsgrad vhat = self.get_slot(var, "vhat") if self._amsbound: vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) else: vhat_t = state_ops.assign(vhat, vhat) v_sqrt = math_ops.sqrt(v_t) # Compute the bounds step_size_bound = step_size / (v_sqrt + epsilon_t) bounded_lr = m_t * clip_by_value(step_size_bound, lower_bound, upper_bound) var_update = state_ops.assign_sub(var, bounded_lr, use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
Example #27
Source File: AdaBound.py From AdaBound-Tensorflow with Apache License 2.0 | 4 votes |
def _apply_dense(self, grad, var): graph = None if context.executing_eagerly() else ops.get_default_graph() beta1_power = math_ops.cast(self._get_non_slot_variable("beta1_power", graph=graph), var.dtype.base_dtype) beta2_power = math_ops.cast(self._get_non_slot_variable("beta2_power", graph=graph), var.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) base_lr_t = math_ops.cast(self._base_lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) gamma_multi = math_ops.cast(self._get_non_slot_variable("gamma_multi", graph=graph), var.dtype.base_dtype) step_size = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) final_lr = self._final_lr * lr_t / base_lr_t lower_bound = final_lr * (1. - 1. / (gamma_multi + 1.)) upper_bound = final_lr * (1. + 1. / (gamma_multi)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking) # amsgrad vhat = self.get_slot(var, "vhat") if self._amsbound : vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) else : vhat_t = state_ops.assign(vhat, vhat) v_sqrt = math_ops.sqrt(v_t) # Compute the bounds step_size_bound = step_size / (v_sqrt + epsilon_t) bounded_lr = m_t * clip_by_value(step_size_bound, lower_bound, upper_bound) var_update = state_ops.assign_sub(var, bounded_lr, use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
Example #28
Source File: indRNN.py From Text-Classification with Apache License 2.0 | 4 votes |
def build(self, inputs_shape): '''construct the IndRNN Cell''' if inputs_shape[1].value is None: raise ValueError("Expected input shape[1] is known") input_depth = inputs_shape[1] if self._input_kernel_initializer is None: self._input_kernel_initializer = init_ops.random_normal_initializer(mean=0, stddev=1e-3) # matrix W self._input_kernel = self.add_variable( "input_kernel", shape=[input_depth, self._num_units], initializer=self._input_kernel_initializer ) if self._recurrent_recurrent_kernel_initializer is None: self._recurrent_recurrent_kernel_initializer = init_ops.constant_initializer(1.) # matrix U self._recurrent_kernel = self.add_variable( "recurrent_kernel", shape=[self._num_units], initializer=self._recurrent_recurrent_kernel_initializer ) # Clip the U to min - max if self._recurrent_min_abs: abs_kernel = math_ops.abs(self._recurrent_kernel) min_abs_kernel = math_ops.maximum(abs_kernel, self._recurrent_min_abs) self._recurrent_kernel = math_ops.multiply( math_ops.sign(self._recurrent_kernel), min_abs_kernel ) if self._recurrent_max_abs: self._recurrent_kernel = clip_ops.clip_by_value( self._recurrent_kernel, -self._recurrent_max_abs, self._recurrent_max_abs ) self._bias = self.add_variable( "bias", shape=[self._num_units], initializer=init_ops.zeros_initializer(dtype=self.dtype) ) # built finished self.built = True
Example #29
Source File: AdaBound.py From captcha_trainer with Apache License 2.0 | 4 votes |
def _apply_sparse_shared(self, grad, var, indices, scatter_add): if StrictVersion(tf.__version__) >= StrictVersion('1.10.0'): graph = None if context.executing_eagerly() else ops.get_default_graph() else: graph = ops.get_default_graph() beta1_power = math_ops.cast(self._get_non_slot_variable("beta1_power", graph=graph), var.dtype.base_dtype) beta2_power = math_ops.cast(self._get_non_slot_variable("beta2_power", graph=graph), var.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) base_lr_t = math_ops.cast(self._base_lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) gamma_t = math_ops.cast(self._gamma_t, var.dtype.base_dtype) step_size = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) final_lr = self._final_lr * lr_t / base_lr_t lower_bound = final_lr * (1. - 1. / (gamma_t + 1.)) upper_bound = final_lr * (1. + 1. / (gamma_t)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking) with ops.control_dependencies([m_t]): m_t = scatter_add(m, indices, m_scaled_g_values) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking) with ops.control_dependencies([v_t]): v_t = scatter_add(v, indices, v_scaled_g_values) # amsgrad vhat = self.get_slot(var, "vhat") if self._amsbound: vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) else: vhat_t = state_ops.assign(vhat, vhat) v_sqrt = math_ops.sqrt(v_t) # Compute the bounds step_size_bound = step_size / (v_sqrt + epsilon_t) bounded_lr = m_t * clip_by_value(step_size_bound, lower_bound, upper_bound) var_update = state_ops.assign_sub(var, bounded_lr, use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
Example #30
Source File: AdaBound.py From captcha_trainer with Apache License 2.0 | 4 votes |
def _resource_apply_dense(self, grad, var): if StrictVersion(tf.__version__) >= StrictVersion('1.10.0'): graph = None if context.executing_eagerly() else ops.get_default_graph() else: graph = ops.get_default_graph() beta1_power = math_ops.cast(self._get_non_slot_variable("beta1_power", graph=graph), grad.dtype.base_dtype) beta2_power = math_ops.cast(self._get_non_slot_variable("beta2_power", graph=graph), grad.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, grad.dtype.base_dtype) base_lr_t = math_ops.cast(self._base_lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, grad.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, grad.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, grad.dtype.base_dtype) gamma_multi = math_ops.cast(self._get_non_slot_variable("gamma_multi", graph=graph), var.dtype.base_dtype) step_size = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) final_lr = self._final_lr * lr_t / base_lr_t lower_bound = final_lr * (1. - 1. / (gamma_multi + 1.)) upper_bound = final_lr * (1. + 1. / (gamma_multi)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking) # amsgrad vhat = self.get_slot(var, "vhat") if self._amsbound: vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) else: vhat_t = state_ops.assign(vhat, vhat) v_sqrt = math_ops.sqrt(v_t) # Compute the bounds step_size_bound = step_size / (v_sqrt + epsilon_t) bounded_lr = m_t * clip_by_value(step_size_bound, lower_bound, upper_bound) var_update = state_ops.assign_sub(var, bounded_lr, use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])