Python tensorflow.python.ops.math_ops.maximum() Examples
The following are 30
code examples of tensorflow.python.ops.math_ops.maximum().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.python.ops.math_ops
, or try the search function
.
Example #1
Source File: odes.py From auto-alt-text-lambda-api with MIT License | 6 votes |
def _optimal_step_size(last_step, error_ratio, safety=0.9, ifactor=10.0, dfactor=0.2, order=5, name=None): """Calculate the optimal size for the next Runge-Kutta step.""" with ops.name_scope( name, 'optimal_step_size', [last_step, error_ratio]) as scope: error_ratio = math_ops.cast(error_ratio, last_step.dtype) exponent = math_ops.cast(1 / order, last_step.dtype) # this looks more complex than necessary, but importantly it keeps # error_ratio in the numerator so we can't divide by zero: factor = math_ops.maximum( 1 / ifactor, math_ops.minimum(error_ratio ** exponent / safety, 1 / dfactor)) return math_ops.div(last_step, factor, name=scope)
Example #2
Source File: tf_image.py From pixel_link with MIT License | 6 votes |
def rotate90(bboxes, xs, ys, k): # bboxes = tf.Print(bboxes, [bboxes], 'before rotate',summarize = 100) ymin, xmin, ymax, xmax = [bboxes[:, i] for i in range(4)] xmin, ymin = tf_rotate_point_by_90(xmin, ymin, k) xmax, ymax = tf_rotate_point_by_90(xmax, ymax, k) new_xmin = tf.minimum(xmin, xmax) new_xmax = tf.maximum(xmin, xmax) new_ymin = tf.minimum(ymin, ymax) new_ymax = tf.maximum(ymin, ymax) bboxes = tf.stack([new_ymin, new_xmin, new_ymax, new_xmax]) bboxes = tf.transpose(bboxes) xs, ys = tf_rotate_point_by_90(xs, ys, k) return bboxes, xs, ys
Example #3
Source File: odes.py From deep_image_model with Apache License 2.0 | 6 votes |
def _optimal_step_size(last_step, error_ratio, safety=0.9, ifactor=10.0, dfactor=0.2, order=5, name=None): """Calculate the optimal size for the next Runge-Kutta step.""" with ops.name_scope( name, 'optimal_step_size', [last_step, error_ratio]) as scope: error_ratio = math_ops.cast(error_ratio, last_step.dtype) exponent = math_ops.cast(1 / order, last_step.dtype) # this looks more complex than necessary, but importantly it keeps # error_ratio in the numerator so we can't divide by zero: factor = math_ops.maximum( 1 / ifactor, math_ops.minimum(error_ratio ** exponent / safety, 1 / dfactor)) return math_ops.div(last_step, factor, name=scope)
Example #4
Source File: layers.py From tensornets with MIT License | 6 votes |
def _lower_bound(inputs, bound, name=None): """Same as tf.maximum, but with helpful gradient for inputs < bound. The gradient is overwritten so that it is passed through if the input is not hitting the bound. If it is, only gradients that push `inputs` higher than the bound are passed through. No gradients are passed through to the bound. Args: inputs: input tensor bound: lower bound for the input tensor name: name for this op Returns: tf.maximum(inputs, bound) """ with ops.name_scope(name, 'GDNLowerBound', [inputs, bound]) as scope: inputs = ops.convert_to_tensor(inputs, name='inputs') bound = ops.convert_to_tensor(bound, name='bound') with ops.get_default_graph().gradient_override_map( {'Maximum': 'GDNLowerBound'}): return math_ops.maximum(inputs, bound, name=scope)
Example #5
Source File: math_grad.py From deep_image_model with Apache License 2.0 | 6 votes |
def _SegmentMinOrMaxGrad(op, grad): """Gradient for SegmentMin and SegmentMax. Both share the same code.""" zeros = array_ops.zeros(array_ops.shape(op.inputs[0]), dtype=op.inputs[0].dtype) # Get the number of selected (minimum or maximum) elements in each segment. gathered_outputs = array_ops.gather(op.outputs[0], op.inputs[1]) is_selected = math_ops.equal(op.inputs[0], gathered_outputs) num_selected = math_ops.segment_sum(math_ops.cast(is_selected, grad.dtype), op.inputs[1]) # Compute the gradient for each segment. The gradient for the ith segment is # divided evenly among the selected elements in that segment. weighted_grads = math_ops.div(grad, num_selected) gathered_grads = array_ops.gather(weighted_grads, op.inputs[1]) return math_ops.select(is_selected, gathered_grads, zeros), None
Example #6
Source File: math_grad.py From deep_image_model with Apache License 2.0 | 6 votes |
def _MinOrMaxGrad(op, grad): """Gradient for Min or Max. Amazingly it's precisely the same code.""" input_shape = array_ops.shape(op.inputs[0]) output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) y = op.outputs[0] y = array_ops.reshape(y, output_shape_kept_dims) grad = array_ops.reshape(grad, output_shape_kept_dims) # Compute the number of selected (maximum or minimum) elements in each # reduction dimension. If there are multiple minimum or maximum elements # then the gradient will be divided between them. indicators = math_ops.cast(math_ops.equal(y, op.inputs[0]), grad.dtype) num_selected = array_ops.reshape( math_ops.reduce_sum(indicators, op.inputs[1]), output_shape_kept_dims) return [math_ops.div(indicators, num_selected) * grad, None]
Example #7
Source File: opt.py From EMNLP2018_NLI with GNU General Public License v3.0 | 6 votes |
def _apply_dense(self, grad, var): lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) if var.dtype.base_dtype == tf.float16: eps = 1e-7 # Can't use 1e-8 due to underflow -- not sure if it makes a big difference. else: eps = 1e-8 v = self.get_slot(var, "v") v_t = v.assign(beta1_t * v + (1. - beta1_t) * grad) m = self.get_slot(var, "m") m_t = m.assign(tf.maximum(beta2_t * m + eps, tf.abs(grad))) g_t = v_t / m_t var_update = state_ops.assign_sub(var, lr_t * g_t) return control_flow_ops.group(*[var_update, m_t, v_t])
Example #8
Source File: spectral_ops.py From lambda-packs with MIT License | 6 votes |
def _infer_fft_length_for_irfft(input_tensor, fft_rank): """Infers the `fft_length` argument for a `rank` IRFFT from `input_tensor`.""" # A TensorShape for the inner fft_rank dimensions. fft_shape = input_tensor.get_shape()[-fft_rank:] # If any dim is unknown, fall back to tensor-based math. if not fft_shape.is_fully_defined(): fft_length = _array_ops.unstack(_array_ops.shape(input_tensor)[-fft_rank:]) fft_length[-1] = _math_ops.maximum(0, 2 * (fft_length[-1] - 1)) return _array_ops.stack(fft_length) # Otherwise, return a constant. fft_length = fft_shape.as_list() if fft_length: fft_length[-1] = max(0, 2 * (fft_length[-1] - 1)) return _ops.convert_to_tensor(fft_length, _dtypes.int32)
Example #9
Source File: math_grad.py From lambda-packs with MIT License | 6 votes |
def _MinOrMaxGrad(op, grad): """Gradient for Min or Max. Amazingly it's precisely the same code.""" input_shape = array_ops.shape(op.inputs[0]) output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) y = op.outputs[0] y = array_ops.reshape(y, output_shape_kept_dims) grad = array_ops.reshape(grad, output_shape_kept_dims) # Compute the number of selected (maximum or minimum) elements in each # reduction dimension. If there are multiple minimum or maximum elements # then the gradient will be divided between them. indicators = math_ops.cast(math_ops.equal(y, op.inputs[0]), grad.dtype) num_selected = array_ops.reshape( math_ops.reduce_sum(indicators, op.inputs[1]), output_shape_kept_dims) return [math_ops.div(indicators, num_selected) * grad, None]
Example #10
Source File: math_grad.py From lambda-packs with MIT License | 6 votes |
def _SegmentMinOrMaxGrad(op, grad, is_sorted): """Gradient for SegmentMin and (unsorted) SegmentMax. They share similar code.""" zeros = array_ops.zeros(array_ops.shape(op.inputs[0]), dtype=op.inputs[0].dtype) # Get the number of selected (minimum or maximum) elements in each segment. gathered_outputs = array_ops.gather(op.outputs[0], op.inputs[1]) is_selected = math_ops.equal(op.inputs[0], gathered_outputs) if is_sorted: num_selected = math_ops.segment_sum(math_ops.cast(is_selected, grad.dtype), op.inputs[1]) else: num_selected = math_ops.unsorted_segment_sum(math_ops.cast(is_selected, grad.dtype), op.inputs[1], op.inputs[2]) # Compute the gradient for each segment. The gradient for the ith segment is # divided evenly among the selected elements in that segment. weighted_grads = math_ops.div(grad, num_selected) gathered_grads = array_ops.gather(weighted_grads, op.inputs[1]) if is_sorted: return array_ops.where(is_selected, gathered_grads, zeros), None else: return array_ops.where(is_selected, gathered_grads, zeros), None, None
Example #11
Source File: backend.py From lambda-packs with MIT License | 6 votes |
def max(x, axis=None, keepdims=False): """Maximum value in a tensor. Arguments: x: A tensor or variable. axis: An integer, the axis to find maximum values. keepdims: A boolean, whether to keep the dimensions or not. If `keepdims` is `False`, the rank of the tensor is reduced by 1. If `keepdims` is `True`, the reduced dimension is retained with length 1. Returns: A tensor with maximum values of `x`. """ axis = _normalize_axis(axis, ndim(x)) return math_ops.reduce_max(x, reduction_indices=axis, keep_dims=keepdims)
Example #12
Source File: metric_loss_ops.py From cluster-loss-tensorflow with BSD 2-Clause "Simplified" License | 6 votes |
def masked_maximum(data, mask, dim=1): """Computes the axis wise maximum over chosen elements. Args: data: 2-D float `Tensor` of size [n, m]. mask: 2-D Boolean `Tensor` of size [n, m]. dim: The dimension over which to compute the maximum. Returns: masked_maximums: N-D `Tensor`. The maximized dimension is of size 1 after the operation. """ axis_minimums = math_ops.reduce_min(data, dim, keep_dims=True) masked_maximums = math_ops.reduce_max( math_ops.multiply( data - axis_minimums, mask), dim, keep_dims=True) + axis_minimums return masked_maximums
Example #13
Source File: odes.py From lambda-packs with MIT License | 6 votes |
def _optimal_step_size(last_step, error_ratio, safety=0.9, ifactor=10.0, dfactor=0.2, order=5, name=None): """Calculate the optimal size for the next Runge-Kutta step.""" with ops.name_scope( name, 'optimal_step_size', [last_step, error_ratio]) as scope: error_ratio = math_ops.cast(error_ratio, last_step.dtype) exponent = math_ops.cast(1 / order, last_step.dtype) # this looks more complex than necessary, but importantly it keeps # error_ratio in the numerator so we can't divide by zero: factor = math_ops.maximum( 1 / ifactor, math_ops.minimum(error_ratio ** exponent / safety, 1 / dfactor)) return math_ops.div(last_step, factor, name=scope)
Example #14
Source File: metric_learning.py From tf-slim with Apache License 2.0 | 6 votes |
def masked_maximum(data, mask, dim=1): """Computes the axis wise maximum over chosen elements. Args: data: 2-D float `Tensor` of size [n, m]. mask: 2-D Boolean `Tensor` of size [n, m]. dim: The dimension over which to compute the maximum. Returns: masked_maximums: N-D `Tensor`. The maximized dimension is of size 1 after the operation. """ axis_minimums = math_ops.reduce_min(data, dim, keepdims=True) masked_maximums = math_ops.reduce_max( math_ops.multiply(data - axis_minimums, mask), dim, keepdims=True) + axis_minimums return masked_maximums
Example #15
Source File: layers.py From tf-slim with Apache License 2.0 | 6 votes |
def _lower_bound(inputs, bound, name=None): """Same as tf.maximum, but with helpful gradient for inputs < bound. The gradient is overwritten so that it is passed through if the input is not hitting the bound. If it is, only gradients that push `inputs` higher than the bound are passed through. No gradients are passed through to the bound. Args: inputs: input tensor bound: lower bound for the input tensor name: name for this op Returns: tf.maximum(inputs, bound) """ with ops.name_scope(name, 'GDNLowerBound', [inputs, bound]) as scope: inputs = ops.convert_to_tensor(inputs, name='inputs') bound = ops.convert_to_tensor(bound, name='bound') with ops.get_default_graph().gradient_override_map( {'Maximum': 'GDNLowerBound'}): return math_ops.maximum(inputs, bound, name=scope)
Example #16
Source File: math_grad.py From auto-alt-text-lambda-api with MIT License | 6 votes |
def _MinOrMaxGrad(op, grad): """Gradient for Min or Max. Amazingly it's precisely the same code.""" input_shape = array_ops.shape(op.inputs[0]) output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) y = op.outputs[0] y = array_ops.reshape(y, output_shape_kept_dims) grad = array_ops.reshape(grad, output_shape_kept_dims) # Compute the number of selected (maximum or minimum) elements in each # reduction dimension. If there are multiple minimum or maximum elements # then the gradient will be divided between them. indicators = math_ops.cast(math_ops.equal(y, op.inputs[0]), grad.dtype) num_selected = array_ops.reshape( math_ops.reduce_sum(indicators, op.inputs[1]), output_shape_kept_dims) return [math_ops.div(indicators, num_selected) * grad, None]
Example #17
Source File: math_grad.py From auto-alt-text-lambda-api with MIT License | 6 votes |
def _SegmentMinOrMaxGrad(op, grad): """Gradient for SegmentMin and SegmentMax. Both share the same code.""" zeros = array_ops.zeros( array_ops.shape(op.inputs[0]), dtype=op.inputs[0].dtype) # Get the number of selected (minimum or maximum) elements in each segment. gathered_outputs = array_ops.gather(op.outputs[0], op.inputs[1]) is_selected = math_ops.equal(op.inputs[0], gathered_outputs) num_selected = math_ops.segment_sum( math_ops.cast(is_selected, grad.dtype), op.inputs[1]) # Compute the gradient for each segment. The gradient for the ith segment is # divided evenly among the selected elements in that segment. weighted_grads = math_ops.div(grad, num_selected) gathered_grads = array_ops.gather(weighted_grads, op.inputs[1]) return array_ops.where(is_selected, gathered_grads, zeros), None
Example #18
Source File: factorization_ops.py From auto-alt-text-lambda-api with MIT License | 6 votes |
def _get_sharding_func(size, num_shards): """Create sharding function for scatter update.""" def func(ids): if num_shards == 1: return None, ids else: ids_per_shard = size // num_shards extras = size % num_shards assignments = math_ops.maximum(ids // (ids_per_shard + 1), (ids - extras) // ids_per_shard) new_ids = array_ops.where(assignments < extras, ids % (ids_per_shard + 1), (ids - extras) % ids_per_shard) return assignments, new_ids return func
Example #19
Source File: core_test.py From auto-alt-text-lambda-api with MIT License | 6 votes |
def setUp(self): super(FloatBinaryOpsTest, self).setUp() self.ops = [ ('igamma', None, math_ops.igamma, core.igamma), ('igammac', None, math_ops.igammac, core.igammac), ('zeta', None, math_ops.zeta, core.zeta), ('polygamma', None, math_ops.polygamma, core.polygamma), ('maximum', None, math_ops.maximum, core.maximum), ('minimum', None, math_ops.minimum, core.minimum), ('squared_difference', None, math_ops.squared_difference, core.squared_difference), ] total_size = np.prod([v.size for v in self.original_lt.axes.values()]) test_lt = core.LabeledTensor( math_ops.cast(self.original_lt, dtypes.float32) / total_size, self.original_lt.axes) self.test_lt_1 = test_lt self.test_lt_2 = 1.0 - test_lt self.test_lt_1_broadcast = self.test_lt_1.tensor self.test_lt_2_broadcast = self.test_lt_2.tensor self.broadcast_axes = self.test_lt_1.axes
Example #20
Source File: opt.py From EMNLP2018_NLI with GNU General Public License v3.0 | 5 votes |
def _apply_sparse_shared(self, grad, var, indices, scatter_add): beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype) beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking) with ops.control_dependencies([m_t]): m_t = scatter_add(m, indices, m_scaled_g_values) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking) with ops.control_dependencies([v_t]): v_t = scatter_add(v, indices, v_scaled_g_values) # amsgrad vhat = self.get_slot(var, "vhat") vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
Example #21
Source File: opt.py From EMNLP2018_NLI with GNU General Public License v3.0 | 5 votes |
def _apply_dense(self, grad, var): beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype) beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking) # amsgrad vhat = self.get_slot(var, "vhat") vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
Example #22
Source File: RankLSTM_model.py From Deep-Listwise-Context-Model-for-Ranking-Refinement with Apache License 2.0 | 5 votes |
def clip_by_each_value(self, t_list, clip_max_value = None, clip_min_value = None, name=None): if (not isinstance(t_list, collections.Sequence) or isinstance(t_list, six.string_types)): raise TypeError("t_list should be a sequence") t_list = list(t_list) with ops.name_scope(name, "clip_by_each_value",t_list + [clip_norm]) as name: values = [ ops.convert_to_tensor( t.values if isinstance(t, ops.IndexedSlices) else t, name="t_%d" % i) if t is not None else t for i, t in enumerate(t_list)] values_clipped = [] for i, v in enumerate(values): if v is None: values_clipped.append(None) else: t = None if clip_value_max != None: t = math_ops.minimum(v, clip_value_max) if clip_value_min != None: t = math_ops.maximum(t, clip_value_min, name=name) with ops.colocate_with(t): values_clipped.append( tf.identity(t, name="%s_%d" % (name, i))) list_clipped = [ ops.IndexedSlices(c_v, t.indices, t.dense_shape) if isinstance(t, ops.IndexedSlices) else c_v for (c_v, t) in zip(values_clipped, t_list)] return list_clipped
Example #23
Source File: opt.py From EMNLP2018_NLI with GNU General Public License v3.0 | 5 votes |
def _resource_apply_dense(self, grad, var): var = var.handle beta1_power = math_ops.cast(self._beta1_power, grad.dtype.base_dtype) beta2_power = math_ops.cast(self._beta2_power, grad.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, grad.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, grad.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, grad.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, grad.dtype.base_dtype) lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m").handle m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v").handle v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking) # amsgrad vhat = self.get_slot(var, "vhat").handle vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
Example #24
Source File: ctc_model.py From tf-end-to-end with MIT License | 5 votes |
def leaky_relu(features, alpha=0.2, name=None): with ops.name_scope(name, "LeakyRelu", [features, alpha]): features = ops.convert_to_tensor(features, name="features") alpha = ops.convert_to_tensor(alpha, name="alpha") return math_ops.maximum(alpha * features, features) # # params["height"] = height of the input image # params["width"] = width of the input image
Example #25
Source File: AMSGrad.py From scGAN with MIT License | 5 votes |
def _resource_apply_dense(self, grad, var): var = var.handle beta1_power = math_ops.cast(self._beta1_power, grad.dtype.base_dtype) beta2_power = math_ops.cast(self._beta2_power, grad.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, grad.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, grad.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, grad.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, grad.dtype.base_dtype) lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m").handle m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v").handle v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking) # amsgrad vhat = self.get_slot(var, "vhat").handle vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
Example #26
Source File: interpolate_spline.py From WarpGAN with MIT License | 5 votes |
def _phi(r, order): """Coordinate-wise nonlinearity used to define the order of the interpolation. See https://en.wikipedia.org/wiki/Polyharmonic_spline for the definition. Args: r: input op order: interpolation order Returns: phi_k evaluated coordinate-wise on r, for k = r """ # using EPSILON prevents log(0), sqrt0), etc. # sqrt(0) is well-defined, but its gradient is not with ops.name_scope('phi'): if order == 1: r = math_ops.maximum(r, EPSILON) r = math_ops.sqrt(r) return r elif order == 2: return 0.5 * r * math_ops.log(math_ops.maximum(r, EPSILON)) elif order == 4: return 0.5 * math_ops.square(r) * math_ops.log( math_ops.maximum(r, EPSILON)) elif order % 2 == 0: r = math_ops.maximum(r, EPSILON) return 0.5 * math_ops.pow(r, 0.5 * order) * math_ops.log(r) else: r = math_ops.maximum(r, EPSILON) return math_ops.pow(r, 0.5 * order)
Example #27
Source File: opt.py From EMNLP2018_NLI with GNU General Public License v3.0 | 5 votes |
def _resource_apply_dense(self, grad, var): var = var.handle beta1_power = math_ops.cast(self._beta1_power, grad.dtype.base_dtype) beta2_power = math_ops.cast(self._beta2_power, grad.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, grad.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, grad.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, grad.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, grad.dtype.base_dtype) lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m").handle m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v").handle v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking) # amsgrad vhat = self.get_slot(var, "vhat").handle vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
Example #28
Source File: metric_learning.py From tf-slim with Apache License 2.0 | 5 votes |
def contrastive_loss(labels, embeddings_anchor, embeddings_positive, margin=1.0): """Computes the contrastive loss. This loss encourages the embedding to be close to each other for the samples of the same label and the embedding to be far apart at least by the margin constant for the samples of different labels. See: http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf Args: labels: 1-D tf.int32 `Tensor` with shape [batch_size] of binary labels indicating positive vs negative pair. embeddings_anchor: 2-D float `Tensor` of embedding vectors for the anchor images. Embeddings should be l2 normalized. embeddings_positive: 2-D float `Tensor` of embedding vectors for the positive images. Embeddings should be l2 normalized. margin: margin term in the loss definition. Returns: contrastive_loss: tf.float32 scalar. """ # Get per pair distances distances = math_ops.sqrt( math_ops.reduce_sum( math_ops.squared_difference(embeddings_anchor, embeddings_positive), 1)) # Add contrastive loss for the siamese network. # label here is {0,1} for neg, pos. return math_ops.reduce_mean( math_ops.cast(labels, distances.dtype) * math_ops.square(distances) + (1. - math_ops.cast(labels, distances.dtype)) * math_ops.square(math_ops.maximum(margin - distances, 0.)), name='contrastive_loss')
Example #29
Source File: metric_learning.py From tf-slim with Apache License 2.0 | 5 votes |
def _compute_vmeasure_score(labels, predictions): vmeasure_score = math_ops.cast( script_ops.py_func( metrics.v_measure_score, [labels, predictions], [dtypes.float64], name='vmeasure'), dtypes.float32) return math_ops.maximum(0.0, vmeasure_score)
Example #30
Source File: opt.py From EMNLP2018_NLI with GNU General Public License v3.0 | 5 votes |
def _apply_dense(self, grad, var): beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype) beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking) # amsgrad vhat = self.get_slot(var, "vhat") vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])