Python tensorflow.python.ops.math_ops.maximum() Examples

The following are 30 code examples of tensorflow.python.ops.math_ops.maximum(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.python.ops.math_ops , or try the search function .
Example #1
Source File: odes.py    From auto-alt-text-lambda-api with MIT License 6 votes vote down vote up
def _optimal_step_size(last_step,
                       error_ratio,
                       safety=0.9,
                       ifactor=10.0,
                       dfactor=0.2,
                       order=5,
                       name=None):
  """Calculate the optimal size for the next Runge-Kutta step."""
  with ops.name_scope(
      name, 'optimal_step_size', [last_step, error_ratio]) as scope:
    error_ratio = math_ops.cast(error_ratio, last_step.dtype)
    exponent = math_ops.cast(1 / order, last_step.dtype)
    # this looks more complex than necessary, but importantly it keeps
    # error_ratio in the numerator so we can't divide by zero:
    factor = math_ops.maximum(
        1 / ifactor,
        math_ops.minimum(error_ratio ** exponent / safety, 1 / dfactor))
    return math_ops.div(last_step, factor, name=scope) 
Example #2
Source File: tf_image.py    From pixel_link with MIT License 6 votes vote down vote up
def rotate90(bboxes, xs, ys, k):
#     bboxes = tf.Print(bboxes, [bboxes], 'before rotate',summarize = 100)
    ymin, xmin, ymax, xmax = [bboxes[:, i] for i in range(4)]
    xmin, ymin = tf_rotate_point_by_90(xmin, ymin, k)
    xmax, ymax = tf_rotate_point_by_90(xmax, ymax, k)
    
    new_xmin = tf.minimum(xmin, xmax)
    new_xmax = tf.maximum(xmin, xmax)
    
    new_ymin = tf.minimum(ymin, ymax)
    new_ymax = tf.maximum(ymin, ymax)
    
    bboxes = tf.stack([new_ymin, new_xmin, new_ymax, new_xmax])
    bboxes = tf.transpose(bboxes)

    xs, ys = tf_rotate_point_by_90(xs, ys, k)
    return bboxes, xs, ys 
Example #3
Source File: odes.py    From deep_image_model with Apache License 2.0 6 votes vote down vote up
def _optimal_step_size(last_step,
                       error_ratio,
                       safety=0.9,
                       ifactor=10.0,
                       dfactor=0.2,
                       order=5,
                       name=None):
  """Calculate the optimal size for the next Runge-Kutta step."""
  with ops.name_scope(
      name, 'optimal_step_size', [last_step, error_ratio]) as scope:
    error_ratio = math_ops.cast(error_ratio, last_step.dtype)
    exponent = math_ops.cast(1 / order, last_step.dtype)
    # this looks more complex than necessary, but importantly it keeps
    # error_ratio in the numerator so we can't divide by zero:
    factor = math_ops.maximum(
        1 / ifactor,
        math_ops.minimum(error_ratio ** exponent / safety, 1 / dfactor))
    return math_ops.div(last_step, factor, name=scope) 
Example #4
Source File: layers.py    From tensornets with MIT License 6 votes vote down vote up
def _lower_bound(inputs, bound, name=None):
    """Same as tf.maximum, but with helpful gradient for inputs < bound.

    The gradient is overwritten so that it is passed through if the input is not
    hitting the bound. If it is, only gradients that push `inputs` higher than
    the bound are passed through. No gradients are passed through to the bound.

    Args:
      inputs: input tensor
      bound: lower bound for the input tensor
      name: name for this op

    Returns:
      tf.maximum(inputs, bound)
    """
    with ops.name_scope(name, 'GDNLowerBound', [inputs, bound]) as scope:
      inputs = ops.convert_to_tensor(inputs, name='inputs')
      bound = ops.convert_to_tensor(bound, name='bound')
      with ops.get_default_graph().gradient_override_map(
          {'Maximum': 'GDNLowerBound'}):
        return math_ops.maximum(inputs, bound, name=scope) 
Example #5
Source File: math_grad.py    From deep_image_model with Apache License 2.0 6 votes vote down vote up
def _SegmentMinOrMaxGrad(op, grad):
  """Gradient for SegmentMin and SegmentMax. Both share the same code."""
  zeros = array_ops.zeros(array_ops.shape(op.inputs[0]),
                          dtype=op.inputs[0].dtype)

  # Get the number of selected (minimum or maximum) elements in each segment.
  gathered_outputs = array_ops.gather(op.outputs[0], op.inputs[1])
  is_selected = math_ops.equal(op.inputs[0], gathered_outputs)
  num_selected = math_ops.segment_sum(math_ops.cast(is_selected, grad.dtype),
                                      op.inputs[1])

  # Compute the gradient for each segment. The gradient for the ith segment is
  # divided evenly among the selected elements in that segment.
  weighted_grads = math_ops.div(grad, num_selected)
  gathered_grads = array_ops.gather(weighted_grads, op.inputs[1])

  return math_ops.select(is_selected, gathered_grads, zeros), None 
Example #6
Source File: math_grad.py    From deep_image_model with Apache License 2.0 6 votes vote down vote up
def _MinOrMaxGrad(op, grad):
  """Gradient for Min or Max. Amazingly it's precisely the same code."""
  input_shape = array_ops.shape(op.inputs[0])
  output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1])
  y = op.outputs[0]
  y = array_ops.reshape(y, output_shape_kept_dims)
  grad = array_ops.reshape(grad, output_shape_kept_dims)

  # Compute the number of selected (maximum or minimum) elements in each
  # reduction dimension. If there are multiple minimum or maximum elements
  # then the gradient will be divided between them.
  indicators = math_ops.cast(math_ops.equal(y, op.inputs[0]), grad.dtype)
  num_selected = array_ops.reshape(
      math_ops.reduce_sum(indicators, op.inputs[1]),
      output_shape_kept_dims)

  return [math_ops.div(indicators, num_selected) * grad, None] 
Example #7
Source File: opt.py    From EMNLP2018_NLI with GNU General Public License v3.0 6 votes vote down vote up
def _apply_dense(self, grad, var):
        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
        beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
        beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
        if var.dtype.base_dtype == tf.float16:
            eps = 1e-7
            # Can't use 1e-8 due to underflow -- not sure if it makes a big difference.
        else:
            eps = 1e-8

        v = self.get_slot(var, "v")
        v_t = v.assign(beta1_t * v + (1. - beta1_t) * grad)
        m = self.get_slot(var, "m")
        m_t = m.assign(tf.maximum(beta2_t * m + eps, tf.abs(grad)))
        g_t = v_t / m_t

        var_update = state_ops.assign_sub(var, lr_t * g_t)
        return control_flow_ops.group(*[var_update, m_t, v_t]) 
Example #8
Source File: spectral_ops.py    From lambda-packs with MIT License 6 votes vote down vote up
def _infer_fft_length_for_irfft(input_tensor, fft_rank):
  """Infers the `fft_length` argument for a `rank` IRFFT from `input_tensor`."""
  # A TensorShape for the inner fft_rank dimensions.
  fft_shape = input_tensor.get_shape()[-fft_rank:]

  # If any dim is unknown, fall back to tensor-based math.
  if not fft_shape.is_fully_defined():
    fft_length = _array_ops.unstack(_array_ops.shape(input_tensor)[-fft_rank:])
    fft_length[-1] = _math_ops.maximum(0, 2 * (fft_length[-1] - 1))
    return _array_ops.stack(fft_length)

  # Otherwise, return a constant.
  fft_length = fft_shape.as_list()
  if fft_length:
    fft_length[-1] = max(0, 2 * (fft_length[-1] - 1))
  return _ops.convert_to_tensor(fft_length, _dtypes.int32) 
Example #9
Source File: math_grad.py    From lambda-packs with MIT License 6 votes vote down vote up
def _MinOrMaxGrad(op, grad):
  """Gradient for Min or Max. Amazingly it's precisely the same code."""
  input_shape = array_ops.shape(op.inputs[0])
  output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1])
  y = op.outputs[0]
  y = array_ops.reshape(y, output_shape_kept_dims)
  grad = array_ops.reshape(grad, output_shape_kept_dims)

  # Compute the number of selected (maximum or minimum) elements in each
  # reduction dimension. If there are multiple minimum or maximum elements
  # then the gradient will be divided between them.
  indicators = math_ops.cast(math_ops.equal(y, op.inputs[0]), grad.dtype)
  num_selected = array_ops.reshape(
      math_ops.reduce_sum(indicators, op.inputs[1]), output_shape_kept_dims)

  return [math_ops.div(indicators, num_selected) * grad, None] 
Example #10
Source File: math_grad.py    From lambda-packs with MIT License 6 votes vote down vote up
def _SegmentMinOrMaxGrad(op, grad, is_sorted):
  """Gradient for SegmentMin and (unsorted) SegmentMax. They share similar code."""
  zeros = array_ops.zeros(array_ops.shape(op.inputs[0]),
                          dtype=op.inputs[0].dtype)

  # Get the number of selected (minimum or maximum) elements in each segment.
  gathered_outputs = array_ops.gather(op.outputs[0], op.inputs[1])
  is_selected = math_ops.equal(op.inputs[0], gathered_outputs)
  if is_sorted:
    num_selected = math_ops.segment_sum(math_ops.cast(is_selected, grad.dtype),
                                        op.inputs[1])
  else:
    num_selected = math_ops.unsorted_segment_sum(math_ops.cast(is_selected, grad.dtype),
                                                 op.inputs[1], op.inputs[2])

  # Compute the gradient for each segment. The gradient for the ith segment is
  # divided evenly among the selected elements in that segment.
  weighted_grads = math_ops.div(grad, num_selected)
  gathered_grads = array_ops.gather(weighted_grads, op.inputs[1])

  if is_sorted:
    return array_ops.where(is_selected, gathered_grads, zeros), None
  else:
    return array_ops.where(is_selected, gathered_grads, zeros), None, None 
Example #11
Source File: backend.py    From lambda-packs with MIT License 6 votes vote down vote up
def max(x, axis=None, keepdims=False):
  """Maximum value in a tensor.

  Arguments:
      x: A tensor or variable.
      axis: An integer, the axis to find maximum values.
      keepdims: A boolean, whether to keep the dimensions or not.
          If `keepdims` is `False`, the rank of the tensor is reduced
          by 1. If `keepdims` is `True`,
          the reduced dimension is retained with length 1.

  Returns:
      A tensor with maximum values of `x`.
  """
  axis = _normalize_axis(axis, ndim(x))
  return math_ops.reduce_max(x, reduction_indices=axis, keep_dims=keepdims) 
Example #12
Source File: metric_loss_ops.py    From cluster-loss-tensorflow with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def masked_maximum(data, mask, dim=1):
  """Computes the axis wise maximum over chosen elements.

  Args:
    data: 2-D float `Tensor` of size [n, m].
    mask: 2-D Boolean `Tensor` of size [n, m].
    dim: The dimension over which to compute the maximum.

  Returns:
    masked_maximums: N-D `Tensor`.
      The maximized dimension is of size 1 after the operation.
  """
  axis_minimums = math_ops.reduce_min(data, dim, keep_dims=True)
  masked_maximums = math_ops.reduce_max(
      math_ops.multiply(
          data - axis_minimums, mask), dim, keep_dims=True) + axis_minimums
  return masked_maximums 
Example #13
Source File: odes.py    From lambda-packs with MIT License 6 votes vote down vote up
def _optimal_step_size(last_step,
                       error_ratio,
                       safety=0.9,
                       ifactor=10.0,
                       dfactor=0.2,
                       order=5,
                       name=None):
  """Calculate the optimal size for the next Runge-Kutta step."""
  with ops.name_scope(
      name, 'optimal_step_size', [last_step, error_ratio]) as scope:
    error_ratio = math_ops.cast(error_ratio, last_step.dtype)
    exponent = math_ops.cast(1 / order, last_step.dtype)
    # this looks more complex than necessary, but importantly it keeps
    # error_ratio in the numerator so we can't divide by zero:
    factor = math_ops.maximum(
        1 / ifactor,
        math_ops.minimum(error_ratio ** exponent / safety, 1 / dfactor))
    return math_ops.div(last_step, factor, name=scope) 
Example #14
Source File: metric_learning.py    From tf-slim with Apache License 2.0 6 votes vote down vote up
def masked_maximum(data, mask, dim=1):
  """Computes the axis wise maximum over chosen elements.

  Args:
    data: 2-D float `Tensor` of size [n, m].
    mask: 2-D Boolean `Tensor` of size [n, m].
    dim: The dimension over which to compute the maximum.

  Returns:
    masked_maximums: N-D `Tensor`.
      The maximized dimension is of size 1 after the operation.
  """
  axis_minimums = math_ops.reduce_min(data, dim, keepdims=True)
  masked_maximums = math_ops.reduce_max(
      math_ops.multiply(data - axis_minimums, mask), dim,
      keepdims=True) + axis_minimums
  return masked_maximums 
Example #15
Source File: layers.py    From tf-slim with Apache License 2.0 6 votes vote down vote up
def _lower_bound(inputs, bound, name=None):
    """Same as tf.maximum, but with helpful gradient for inputs < bound.

    The gradient is overwritten so that it is passed through if the input is not
    hitting the bound. If it is, only gradients that push `inputs` higher than
    the bound are passed through. No gradients are passed through to the bound.

    Args:
      inputs: input tensor
      bound: lower bound for the input tensor
      name: name for this op

    Returns:
      tf.maximum(inputs, bound)
    """
    with ops.name_scope(name, 'GDNLowerBound', [inputs, bound]) as scope:
      inputs = ops.convert_to_tensor(inputs, name='inputs')
      bound = ops.convert_to_tensor(bound, name='bound')
      with ops.get_default_graph().gradient_override_map(
          {'Maximum': 'GDNLowerBound'}):
        return math_ops.maximum(inputs, bound, name=scope) 
Example #16
Source File: math_grad.py    From auto-alt-text-lambda-api with MIT License 6 votes vote down vote up
def _MinOrMaxGrad(op, grad):
  """Gradient for Min or Max. Amazingly it's precisely the same code."""
  input_shape = array_ops.shape(op.inputs[0])
  output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1])
  y = op.outputs[0]
  y = array_ops.reshape(y, output_shape_kept_dims)
  grad = array_ops.reshape(grad, output_shape_kept_dims)

  # Compute the number of selected (maximum or minimum) elements in each
  # reduction dimension. If there are multiple minimum or maximum elements
  # then the gradient will be divided between them.
  indicators = math_ops.cast(math_ops.equal(y, op.inputs[0]), grad.dtype)
  num_selected = array_ops.reshape(
      math_ops.reduce_sum(indicators, op.inputs[1]), output_shape_kept_dims)

  return [math_ops.div(indicators, num_selected) * grad, None] 
Example #17
Source File: math_grad.py    From auto-alt-text-lambda-api with MIT License 6 votes vote down vote up
def _SegmentMinOrMaxGrad(op, grad):
  """Gradient for SegmentMin and SegmentMax. Both share the same code."""
  zeros = array_ops.zeros(
      array_ops.shape(op.inputs[0]), dtype=op.inputs[0].dtype)

  # Get the number of selected (minimum or maximum) elements in each segment.
  gathered_outputs = array_ops.gather(op.outputs[0], op.inputs[1])
  is_selected = math_ops.equal(op.inputs[0], gathered_outputs)
  num_selected = math_ops.segment_sum(
      math_ops.cast(is_selected, grad.dtype), op.inputs[1])

  # Compute the gradient for each segment. The gradient for the ith segment is
  # divided evenly among the selected elements in that segment.
  weighted_grads = math_ops.div(grad, num_selected)
  gathered_grads = array_ops.gather(weighted_grads, op.inputs[1])

  return array_ops.where(is_selected, gathered_grads, zeros), None 
Example #18
Source File: factorization_ops.py    From auto-alt-text-lambda-api with MIT License 6 votes vote down vote up
def _get_sharding_func(size, num_shards):
    """Create sharding function for scatter update."""

    def func(ids):
      if num_shards == 1:
        return None, ids
      else:
        ids_per_shard = size // num_shards
        extras = size % num_shards
        assignments = math_ops.maximum(ids // (ids_per_shard + 1),
                                       (ids - extras) // ids_per_shard)
        new_ids = array_ops.where(assignments < extras,
                                  ids % (ids_per_shard + 1),
                                  (ids - extras) % ids_per_shard)
        return assignments, new_ids

    return func 
Example #19
Source File: core_test.py    From auto-alt-text-lambda-api with MIT License 6 votes vote down vote up
def setUp(self):
    super(FloatBinaryOpsTest, self).setUp()

    self.ops = [
        ('igamma', None, math_ops.igamma, core.igamma),
        ('igammac', None, math_ops.igammac, core.igammac),
        ('zeta', None, math_ops.zeta, core.zeta),
        ('polygamma', None, math_ops.polygamma, core.polygamma),
        ('maximum', None, math_ops.maximum, core.maximum),
        ('minimum', None, math_ops.minimum, core.minimum),
        ('squared_difference', None, math_ops.squared_difference,
         core.squared_difference),
    ]
    total_size = np.prod([v.size for v in self.original_lt.axes.values()])
    test_lt = core.LabeledTensor(
        math_ops.cast(self.original_lt, dtypes.float32) / total_size,
        self.original_lt.axes)
    self.test_lt_1 = test_lt
    self.test_lt_2 = 1.0 - test_lt
    self.test_lt_1_broadcast = self.test_lt_1.tensor
    self.test_lt_2_broadcast = self.test_lt_2.tensor
    self.broadcast_axes = self.test_lt_1.axes 
Example #20
Source File: opt.py    From EMNLP2018_NLI with GNU General Public License v3.0 5 votes vote down vote up
def _apply_sparse_shared(self, grad, var, indices, scatter_add):
        beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype)
        beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype)
        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
        beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
        beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
        epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)

        lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))

        # m_t = beta1 * m + (1 - beta1) * g_t
        m = self.get_slot(var, "m")
        m_scaled_g_values = grad * (1 - beta1_t)
        m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking)
        with ops.control_dependencies([m_t]):
            m_t = scatter_add(m, indices, m_scaled_g_values)

        # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
        v = self.get_slot(var, "v")
        v_scaled_g_values = (grad * grad) * (1 - beta2_t)
        v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking)
        with ops.control_dependencies([v_t]):
            v_t = scatter_add(v, indices, v_scaled_g_values)

        # amsgrad
        vhat = self.get_slot(var, "vhat")
        vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
        v_sqrt = math_ops.sqrt(vhat_t)
        var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking)
        return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t]) 
Example #21
Source File: opt.py    From EMNLP2018_NLI with GNU General Public License v3.0 5 votes vote down vote up
def _apply_dense(self, grad, var):
        beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype)
        beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype)
        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
        beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
        beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
        epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)

        lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))

        # m_t = beta1 * m + (1 - beta1) * g_t
        m = self.get_slot(var, "m")
        m_scaled_g_values = grad * (1 - beta1_t)
        m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking)

        # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
        v = self.get_slot(var, "v")
        v_scaled_g_values = (grad * grad) * (1 - beta2_t)
        v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking)

        # amsgrad
        vhat = self.get_slot(var, "vhat")
        vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
        v_sqrt = math_ops.sqrt(vhat_t)

        var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking)
        return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t]) 
Example #22
Source File: RankLSTM_model.py    From Deep-Listwise-Context-Model-for-Ranking-Refinement with Apache License 2.0 5 votes vote down vote up
def clip_by_each_value(self, t_list, clip_max_value = None, clip_min_value = None, name=None):
		if (not isinstance(t_list, collections.Sequence)
			or isinstance(t_list, six.string_types)):
			raise TypeError("t_list should be a sequence")
		t_list = list(t_list)

		with ops.name_scope(name, "clip_by_each_value",t_list + [clip_norm]) as name:
			values = [
					ops.convert_to_tensor(
							t.values if isinstance(t, ops.IndexedSlices) else t,
							name="t_%d" % i)
					if t is not None else t
					for i, t in enumerate(t_list)]

			values_clipped = []
			for i, v in enumerate(values):
				if v is None:
					values_clipped.append(None)
				else:
					t = None
					if clip_value_max != None:
						t = math_ops.minimum(v, clip_value_max)
					if clip_value_min != None:
						t = math_ops.maximum(t, clip_value_min, name=name)
					with ops.colocate_with(t):
						values_clipped.append(
								tf.identity(t, name="%s_%d" % (name, i)))

			list_clipped = [
					ops.IndexedSlices(c_v, t.indices, t.dense_shape)
					if isinstance(t, ops.IndexedSlices)
					else c_v
					for (c_v, t) in zip(values_clipped, t_list)]

		return list_clipped 
Example #23
Source File: opt.py    From EMNLP2018_NLI with GNU General Public License v3.0 5 votes vote down vote up
def _resource_apply_dense(self, grad, var):
        var = var.handle
        beta1_power = math_ops.cast(self._beta1_power, grad.dtype.base_dtype)
        beta2_power = math_ops.cast(self._beta2_power, grad.dtype.base_dtype)
        lr_t = math_ops.cast(self._lr_t, grad.dtype.base_dtype)
        beta1_t = math_ops.cast(self._beta1_t, grad.dtype.base_dtype)
        beta2_t = math_ops.cast(self._beta2_t, grad.dtype.base_dtype)
        epsilon_t = math_ops.cast(self._epsilon_t, grad.dtype.base_dtype)

        lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))

        # m_t = beta1 * m + (1 - beta1) * g_t
        m = self.get_slot(var, "m").handle
        m_scaled_g_values = grad * (1 - beta1_t)
        m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking)

        # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
        v = self.get_slot(var, "v").handle
        v_scaled_g_values = (grad * grad) * (1 - beta2_t)
        v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking)

        # amsgrad
        vhat = self.get_slot(var, "vhat").handle
        vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
        v_sqrt = math_ops.sqrt(vhat_t)

        var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking)
        return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t]) 
Example #24
Source File: ctc_model.py    From tf-end-to-end with MIT License 5 votes vote down vote up
def leaky_relu(features, alpha=0.2, name=None):
  with ops.name_scope(name, "LeakyRelu", [features, alpha]):
    features = ops.convert_to_tensor(features, name="features")
    alpha = ops.convert_to_tensor(alpha, name="alpha")
    return math_ops.maximum(alpha * features, features)



#
# params["height"] = height of the input image
# params["width"] = width of the input image 
Example #25
Source File: AMSGrad.py    From scGAN with MIT License 5 votes vote down vote up
def _resource_apply_dense(self, grad, var):
        var = var.handle
        beta1_power = math_ops.cast(self._beta1_power, grad.dtype.base_dtype)
        beta2_power = math_ops.cast(self._beta2_power, grad.dtype.base_dtype)
        lr_t = math_ops.cast(self._lr_t, grad.dtype.base_dtype)
        beta1_t = math_ops.cast(self._beta1_t, grad.dtype.base_dtype)
        beta2_t = math_ops.cast(self._beta2_t, grad.dtype.base_dtype)
        epsilon_t = math_ops.cast(self._epsilon_t, grad.dtype.base_dtype)

        lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))

        # m_t = beta1 * m + (1 - beta1) * g_t
        m = self.get_slot(var, "m").handle
        m_scaled_g_values = grad * (1 - beta1_t)
        m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values,
                               use_locking=self._use_locking)

        # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
        v = self.get_slot(var, "v").handle
        v_scaled_g_values = (grad * grad) * (1 - beta2_t)
        v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values,
                               use_locking=self._use_locking)

        # amsgrad
        vhat = self.get_slot(var, "vhat").handle
        vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
        v_sqrt = math_ops.sqrt(vhat_t)

        var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t),
                                          use_locking=self._use_locking)
        return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t]) 
Example #26
Source File: interpolate_spline.py    From WarpGAN with MIT License 5 votes vote down vote up
def _phi(r, order):
  """Coordinate-wise nonlinearity used to define the order of the interpolation.

  See https://en.wikipedia.org/wiki/Polyharmonic_spline for the definition.

  Args:
    r: input op
    order: interpolation order

  Returns:
    phi_k evaluated coordinate-wise on r, for k = r
  """

  # using EPSILON prevents log(0), sqrt0), etc.
  # sqrt(0) is well-defined, but its gradient is not
  with ops.name_scope('phi'):
    if order == 1:
      r = math_ops.maximum(r, EPSILON)
      r = math_ops.sqrt(r)
      return r
    elif order == 2:
      return 0.5 * r * math_ops.log(math_ops.maximum(r, EPSILON))
    elif order == 4:
      return 0.5 * math_ops.square(r) * math_ops.log(
          math_ops.maximum(r, EPSILON))
    elif order % 2 == 0:
      r = math_ops.maximum(r, EPSILON)
      return 0.5 * math_ops.pow(r, 0.5 * order) * math_ops.log(r)
    else:
      r = math_ops.maximum(r, EPSILON)
      return math_ops.pow(r, 0.5 * order) 
Example #27
Source File: opt.py    From EMNLP2018_NLI with GNU General Public License v3.0 5 votes vote down vote up
def _resource_apply_dense(self, grad, var):
        var = var.handle
        beta1_power = math_ops.cast(self._beta1_power, grad.dtype.base_dtype)
        beta2_power = math_ops.cast(self._beta2_power, grad.dtype.base_dtype)
        lr_t = math_ops.cast(self._lr_t, grad.dtype.base_dtype)
        beta1_t = math_ops.cast(self._beta1_t, grad.dtype.base_dtype)
        beta2_t = math_ops.cast(self._beta2_t, grad.dtype.base_dtype)
        epsilon_t = math_ops.cast(self._epsilon_t, grad.dtype.base_dtype)

        lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))

        # m_t = beta1 * m + (1 - beta1) * g_t
        m = self.get_slot(var, "m").handle
        m_scaled_g_values = grad * (1 - beta1_t)
        m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking)

        # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
        v = self.get_slot(var, "v").handle
        v_scaled_g_values = (grad * grad) * (1 - beta2_t)
        v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking)

        # amsgrad
        vhat = self.get_slot(var, "vhat").handle
        vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
        v_sqrt = math_ops.sqrt(vhat_t)

        var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking)
        return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t]) 
Example #28
Source File: metric_learning.py    From tf-slim with Apache License 2.0 5 votes vote down vote up
def contrastive_loss(labels, embeddings_anchor, embeddings_positive,
                     margin=1.0):
  """Computes the contrastive loss.

  This loss encourages the embedding to be close to each other for
    the samples of the same label and the embedding to be far apart at least
    by the margin constant for the samples of different labels.
  See: http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf

  Args:
    labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
      binary labels indicating positive vs negative pair.
    embeddings_anchor: 2-D float `Tensor` of embedding vectors for the anchor
      images. Embeddings should be l2 normalized.
    embeddings_positive: 2-D float `Tensor` of embedding vectors for the
      positive images. Embeddings should be l2 normalized.
    margin: margin term in the loss definition.

  Returns:
    contrastive_loss: tf.float32 scalar.
  """
  # Get per pair distances
  distances = math_ops.sqrt(
      math_ops.reduce_sum(
          math_ops.squared_difference(embeddings_anchor, embeddings_positive),
          1))

  # Add contrastive loss for the siamese network.
  #   label here is {0,1} for neg, pos.
  return math_ops.reduce_mean(
      math_ops.cast(labels, distances.dtype) * math_ops.square(distances) +
      (1. - math_ops.cast(labels, distances.dtype)) *
      math_ops.square(math_ops.maximum(margin - distances, 0.)),
      name='contrastive_loss') 
Example #29
Source File: metric_learning.py    From tf-slim with Apache License 2.0 5 votes vote down vote up
def _compute_vmeasure_score(labels, predictions):
  vmeasure_score = math_ops.cast(
      script_ops.py_func(
          metrics.v_measure_score, [labels, predictions], [dtypes.float64],
          name='vmeasure'),
      dtypes.float32)
  return math_ops.maximum(0.0, vmeasure_score) 
Example #30
Source File: opt.py    From EMNLP2018_NLI with GNU General Public License v3.0 5 votes vote down vote up
def _apply_dense(self, grad, var):
        beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype)
        beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype)
        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
        beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
        beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
        epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)

        lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))

        # m_t = beta1 * m + (1 - beta1) * g_t
        m = self.get_slot(var, "m")
        m_scaled_g_values = grad * (1 - beta1_t)
        m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking)

        # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
        v = self.get_slot(var, "v")
        v_scaled_g_values = (grad * grad) * (1 - beta2_t)
        v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking)

        # amsgrad
        vhat = self.get_slot(var, "vhat")
        vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
        v_sqrt = math_ops.sqrt(vhat_t)

        var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking)
        return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])