Python tensorflow.python.eager.context.executing_eagerly() Examples

The following are 30 code examples of tensorflow.python.eager.context.executing_eagerly(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.python.eager.context , or try the search function .
Example #1
Source File: layers.py    From tensornets with MIT License 6 votes vote down vote up
def softmax(logits, scope=None):
  """Performs softmax on Nth dimension of N-dimensional logit tensor.

  For two-dimensional logits this reduces to tf.nn.softmax. The N-th dimension
  needs to have a specified number of elements (number of classes).

  Args:
    logits: N-dimensional `Tensor` with logits, where N > 1.
    scope: Optional scope for variable_scope.

  Returns:
    A `Tensor` with same shape and type as logits.
  """
  # TODO(jrru): Add axis argument which defaults to last dimension.
  with variable_scope.variable_scope(scope, 'softmax', [logits]):
    num_logits = utils.last_dimension(logits.get_shape(), min_rank=2)
    logits_2d = array_ops.reshape(logits, [-1, num_logits])
    predictions = nn.softmax(logits_2d)
    predictions = array_ops.reshape(predictions, array_ops.shape(logits))
    if not context.executing_eagerly():
      predictions.set_shape(logits.get_shape())
    return predictions 
Example #2
Source File: spectral_norm_dense.py    From tf2rl with MIT License 6 votes vote down vote up
def call(self, inputs):
        w = self.compute_spectral_norm()
        inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
        rank = common_shapes.rank(inputs)
        if rank > 2:
            # Broadcasting is required for the inputs.
            outputs = standard_ops.tensordot(inputs, w, [[rank - 1], [0]])
            # Reshape the output back to the original ndim of the input.
            if not context.executing_eagerly():
                shape = inputs.get_shape().as_list()
                output_shape = shape[:-1] + [self.units]
                outputs.set_shape(output_shape)
        else:
            outputs = gen_math_ops.mat_mul(inputs, w)
        if self.use_bias:
            outputs = nn.bias_add(outputs, self.bias)
        if self.activation is not None:
            return self.activation(outputs)  # pylint: disable=not-callable
        return outputs 
Example #3
Source File: temporal_convolutional_network.py    From nlp-architect with Apache License 2.0 6 votes vote down vote up
def __init__(self, layer, data_init=False, **kwargs):
        if not isinstance(layer, Layer):
            raise ValueError(
                "Please initialize `WeightNorm` layer with a "
                "`Layer` instance. You passed: {input}".format(input=layer)
            )

        if not context.executing_eagerly() and data_init:
            raise NotImplementedError(
                "Data dependent variable initialization is not available for " "graph execution"
            )

        self.initialized = True
        if data_init:
            self.initialized = False

        self.layer_depth = None
        self.norm_axes = None
        super(WeightNorm, self).__init__(layer, **kwargs)
        self._track_trackable(layer, name="layer") 
Example #4
Source File: pad_along_dimension_op_test.py    From text with Apache License 2.0 6 votes vote down vote up
def testRaggedPadDimensionErrors(self):
    ragged_data = ragged_factory_ops.constant([[1, 2], [3, 4]])
    self.assertRaisesRegexp(
        errors.InvalidArgumentError,
        'axis must be between -k <= axis <= -1 OR 0 <= axis < k',
        pad_along_dimension_op.pad_along_dimension,
        ragged_data,
        left_pad=[0],
        axis=2)
    self.assertRaisesRegexp(
        ValueError,
        r'Shapes .* are incompatible',
        pad_along_dimension_op.pad_along_dimension,
        ragged_data,
        axis=1,
        left_pad=ragged_data)
    if not context.executing_eagerly():
      self.assertRaisesRegexp(
          ValueError, 'axis may not be negative if data is ragged '
          'and data.ndims is not statically known.',
          pad_along_dimension_op.pad_along_dimension,
          ragged_tensor.RaggedTensor.from_tensor(
              array_ops.placeholder_with_default([[1, 2], [3, 4]], shape=None)),
          left_pad=[0],
          axis=-1) 
Example #5
Source File: AdaBound.py    From AdaBound-Tensorflow with Apache License 2.0 6 votes vote down vote up
def _finish(self, update_ops, name_scope):
        # Update the power accumulators.
        with ops.control_dependencies(update_ops):
            graph = None if context.executing_eagerly() else ops.get_default_graph()
            beta1_power = self._get_non_slot_variable("beta1_power", graph=graph)
            beta2_power = self._get_non_slot_variable("beta2_power", graph=graph)
            gamma_multi = self._get_non_slot_variable("gamma_multi", graph=graph)
            with ops.colocate_with(beta1_power):
                update_beta1 = beta1_power.assign(
                    beta1_power * self._beta1_t,
                    use_locking=self._use_locking)
                update_beta2 = beta2_power.assign(
                    beta2_power * self._beta2_t,
                    use_locking=self._use_locking)
                update_gamma = gamma_multi.assign(
                    gamma_multi + self._gamma_t,
                    use_locking=self._use_locking)
        return control_flow_ops.group(*update_ops + [update_beta1, update_beta2, update_gamma],
                                      name=name_scope) 
Example #6
Source File: AdaBound.py    From captcha_trainer with Apache License 2.0 6 votes vote down vote up
def _create_slots(self, var_list):
        first_var = min(var_list, key=lambda x: x.name)
        if StrictVersion(tf.__version__) >= StrictVersion('1.10.0'):
            graph = None if context.executing_eagerly() else ops.get_default_graph()
        else:
            graph = ops.get_default_graph()
        create_new = self._get_non_slot_variable("beta1_power", graph) is None
        if not create_new and context.in_graph_mode():
            create_new = (self._get_non_slot_variable("beta1_power", graph).graph is not first_var.graph)

        if create_new:
            self._create_non_slot_variable(initial_value=self._beta1,
                                           name="beta1_power",
                                           colocate_with=first_var)
            self._create_non_slot_variable(initial_value=self._beta2,
                                           name="beta2_power",
                                           colocate_with=first_var)
            self._create_non_slot_variable(initial_value=self._gamma,
                                           name="gamma_multi",
                                           colocate_with=first_var)
        # Create slots for the first and second moments.
        for v in var_list :
            self._zeros_slot(v, "m", self._name)
            self._zeros_slot(v, "v", self._name)
            self._zeros_slot(v, "vhat", self._name) 
Example #7
Source File: AdaBound.py    From AdaBound-Tensorflow with Apache License 2.0 6 votes vote down vote up
def _create_slots(self, var_list):
        first_var = min(var_list, key=lambda x: x.name)

        graph = None if context.executing_eagerly() else ops.get_default_graph()
        create_new = self._get_non_slot_variable("beta1_power", graph) is None
        if not create_new and context.in_graph_mode():
            create_new = (self._get_non_slot_variable("beta1_power", graph).graph is not first_var.graph)

        if create_new:
            self._create_non_slot_variable(initial_value=self._beta1,
                                           name="beta1_power",
                                           colocate_with=first_var)
            self._create_non_slot_variable(initial_value=self._beta2,
                                           name="beta2_power",
                                           colocate_with=first_var)
            self._create_non_slot_variable(initial_value=self._gamma,
                                           name="gamma_multi",
                                           colocate_with=first_var)
        # Create slots for the first and second moments.
        for v in var_list :
            self._zeros_slot(v, "m", self._name)
            self._zeros_slot(v, "v", self._name)
            self._zeros_slot(v, "vhat", self._name) 
Example #8
Source File: AdaBound.py    From HyperGAN with MIT License 5 votes vote down vote up
def _create_slots(self, var_list):
        first_var = min(var_list, key=lambda x: x.name)

        graph = None if context.executing_eagerly() else ops.get_default_graph()
        # Create slots for the first and second moments.
        for v in var_list :
            self._zeros_slot(v, "m", self._name)
            self._zeros_slot(v, "v", self._name)
            self._zeros_slot(v, "vhat", self._name) 
Example #9
Source File: temporal_convolutional_network.py    From nlp-architect with Apache License 2.0 5 votes vote down vote up
def call(self, inputs):
        """Call `Layer`"""
        if context.executing_eagerly():
            if not self.initialized:
                self._data_dep_init(inputs)
            self._compute_weights()  # Recompute weights for each forward pass

        output = self.layer.call(inputs)
        return output 
Example #10
Source File: training.py    From keras-radam with MIT License 5 votes vote down vote up
def _get_beta_accumulators(self):
        with ops.init_scope():
            if context.executing_eagerly():
                graph = None
            else:
                graph = ops.get_default_graph()
            return (self._get_non_slot_variable("step", graph=graph),
                    self._get_non_slot_variable("beta1_power", graph=graph),
                    self._get_non_slot_variable("beta2_power", graph=graph)) 
Example #11
Source File: RAdam.py    From RAdam-Tensorflow with MIT License 5 votes vote down vote up
def _get_beta_accumulators(self):
        with ops.init_scope():
            if context.executing_eagerly():
                graph = None
            else:
                graph = ops.get_default_graph()
            return (self._get_non_slot_variable("step", graph=graph),
                    self._get_non_slot_variable("beta1_power", graph=graph),
                    self._get_non_slot_variable("beta2_power", graph=graph)) 
Example #12
Source File: lamb_optimizer_v1.py    From training with Apache License 2.0 5 votes vote down vote up
def _get_beta_accumulators(self):
    with ops.init_scope():
      if context.executing_eagerly():
        graph = None
      else:
        graph = ops.get_default_graph()
      return (self._get_non_slot_variable("beta1_power", graph=graph),
              self._get_non_slot_variable("beta2_power", graph=graph)) 
Example #13
Source File: lamb_optimizer_v1.py    From training with Apache License 2.0 5 votes vote down vote up
def _get_beta_accumulators(self):
    with ops.init_scope():
      if context.executing_eagerly():
        graph = None
      else:
        graph = ops.get_default_graph()
      return (self._get_non_slot_variable("beta1_power", graph=graph),
              self._get_non_slot_variable("beta2_power", graph=graph)) 
Example #14
Source File: lookahead_tensorflow.py    From lookahead with MIT License 5 votes vote down vote up
def _get_la_step_accumulators(self):
        with ops.init_scope():
            if context.executing_eagerly():
                graph = None
            else:
                graph = ops.get_default_graph()
            return self._get_non_slot_variable("la_step", graph=graph) 
Example #15
Source File: gdn.py    From pcc_geo_cnn with MIT License 5 votes vote down vote up
def call(self, inputs):
    inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
    ndim = self._input_rank

    if self.rectify:
      inputs = nn.relu(inputs)

    # Compute normalization pool.
    if ndim == 2:
      norm_pool = math_ops.matmul(math_ops.square(inputs), self.gamma)
      norm_pool = nn.bias_add(norm_pool, self.beta)
    elif self.data_format == "channels_last" and ndim <= 5:
      shape = self.gamma.shape.as_list()
      gamma = array_ops.reshape(self.gamma, (ndim - 2) * [1] + shape)
      norm_pool = nn.convolution(math_ops.square(inputs), gamma, "VALID")
      norm_pool = nn.bias_add(norm_pool, self.beta)
    else:  # generic implementation
      # This puts channels in the last dimension regardless of input.
      norm_pool = math_ops.tensordot(
          math_ops.square(inputs), self.gamma, [[self._channel_axis()], [0]])
      norm_pool += self.beta
      if self.data_format == "channels_first":
        # Return to channels_first format if necessary.
        axes = list(range(ndim - 1))
        axes.insert(1, ndim - 1)
        norm_pool = array_ops.transpose(norm_pool, axes)

    if self.inverse:
      norm_pool = math_ops.sqrt(norm_pool)
    else:
      norm_pool = math_ops.rsqrt(norm_pool)
    outputs = inputs * norm_pool

    if not context.executing_eagerly():
      outputs.set_shape(self.compute_output_shape(inputs.shape))
    return outputs 
Example #16
Source File: taware_layer.py    From THRED with MIT License 5 votes vote down vote up
def __op(self, kernel, inputs, shape):
        if len(shape) > 2:
            # Broadcasting is required for the inputs.
            outputs = tf.tensordot(inputs, kernel, [[len(shape) - 1],[0]])
            # Reshape the output back to the original ndim of the input.
            # if context.in_graph_mode():
            # for tf > 1.5.0
            if not context.executing_eagerly():
                output_shape = shape[:-1] + [self.units]
                outputs.set_shape(output_shape)
        else:
            outputs = tf.matmul(inputs, kernel)

        return outputs 
Example #17
Source File: RAdam.py    From captcha_trainer with Apache License 2.0 5 votes vote down vote up
def _get_beta_accumulators(self):
        with ops.init_scope():
            if context.executing_eagerly():
                graph = None
            else:
                graph = ops.get_default_graph()
            return (self._get_non_slot_variable("step", graph=graph),
                    self._get_non_slot_variable("beta1_power", graph=graph),
                    self._get_non_slot_variable("beta2_power", graph=graph)) 
Example #18
Source File: span_overlaps_op_test.py    From text with Apache License 2.0 5 votes vote down vote up
def testErrors(self):
    t = [10, 20, 30, 40, 50]

    with self.assertRaisesRegexp(TypeError, 'contains must be bool.'):
      pointer_ops.span_overlaps(t, t, t, t, contains='x')
    with self.assertRaisesRegexp(TypeError, 'contained_by must be bool.'):
      pointer_ops.span_overlaps(t, t, t, t, contained_by='x')
    with self.assertRaisesRegexp(TypeError, 'partial_overlap must be bool.'):
      pointer_ops.span_overlaps(t, t, t, t, partial_overlap='x')
    with self.assertRaisesRegexp(
        TypeError, 'source_start, source_limit, target_start, and '
        'target_limit must all have the same dtype'):
      pointer_ops.span_overlaps(t, t, t, [1.0, 2.0, 3.0, 4.0, 5.0])
    with self.assertRaisesRegexp(ValueError,
                                 r'Shapes \(5,\) and \(4,\) are incompatible'):
      pointer_ops.span_overlaps(t, t[:4], t, t)
    with self.assertRaisesRegexp(ValueError,
                                 r'Shapes \(4,\) and \(5,\) are incompatible'):
      pointer_ops.span_overlaps(t, t, t[:4], t)
    with self.assertRaisesRegexp(
        ValueError, r'Shapes \(1, 5\) and \(5,\) must have the same rank'):
      pointer_ops.span_overlaps([t], [t], t, t)
    if not context.executing_eagerly():
      with self.assertRaisesRegexp(
          ValueError, 'For ragged inputs, the shape.ndims of at least one '
          'span tensor must be statically known.'):
        x = ragged_tensor.RaggedTensor.from_row_splits(
            array_ops.placeholder(dtypes.int32), [0, 3, 8])
        pointer_ops.span_overlaps(x, x, x, x)
    with self.assertRaisesRegexp(
        ValueError, 'Span tensors must all have the same ragged_rank'):
      a = [[10, 20, 30], [40, 50, 60]]
      pointer_ops.span_overlaps(a, a, a, ragged_factory_ops.constant(a))
    with self.assertRaisesRegexp(
        errors.InvalidArgumentError,
        'Mismatched ragged shapes for batch dimensions'):
      rt1 = ragged_factory_ops.constant([[[1, 2], [3]], [[4, 5]]])
      rt2 = ragged_factory_ops.constant([[[1, 2], [3]], [[4, 5], [6]]])
      pointer_ops.span_overlaps(rt1, rt1, rt2, rt2) 
Example #19
Source File: qhadam.py    From qhoptim with MIT License 5 votes vote down vote up
def _get_beta_weights(self):
        with ops.init_scope():
            if context.executing_eagerly():
                graph = None
            else:
                graph = ops.get_default_graph()
        return (
            self._get_non_slot_variable("beta1_weight", graph=graph),
            self._get_non_slot_variable("beta2_weight", graph=graph),
        ) 
Example #20
Source File: AdaBound.py    From captcha_trainer with Apache License 2.0 4 votes vote down vote up
def _apply_sparse_shared(self, grad, var, indices, scatter_add):
        if StrictVersion(tf.__version__) >= StrictVersion('1.10.0'):
            graph = None if context.executing_eagerly() else ops.get_default_graph()
        else:
            graph = ops.get_default_graph()
        beta1_power = math_ops.cast(self._get_non_slot_variable("beta1_power", graph=graph), var.dtype.base_dtype)
        beta2_power = math_ops.cast(self._get_non_slot_variable("beta2_power", graph=graph), var.dtype.base_dtype)
        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
        base_lr_t = math_ops.cast(self._base_lr_t, var.dtype.base_dtype)
        beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
        beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
        epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
        gamma_t = math_ops.cast(self._gamma_t, var.dtype.base_dtype)

        step_size = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
        final_lr = self._final_lr * lr_t / base_lr_t
        lower_bound = final_lr * (1. - 1. / (gamma_t + 1.))
        upper_bound = final_lr * (1. + 1. / (gamma_t))

        # m_t = beta1 * m + (1 - beta1) * g_t
        m = self.get_slot(var, "m")
        m_scaled_g_values = grad * (1 - beta1_t)
        m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking)
        with ops.control_dependencies([m_t]):
            m_t = scatter_add(m, indices, m_scaled_g_values)

        # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
        v = self.get_slot(var, "v")
        v_scaled_g_values = (grad * grad) * (1 - beta2_t)
        v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking)
        with ops.control_dependencies([v_t]):
            v_t = scatter_add(v, indices, v_scaled_g_values)

        # amsgrad
        vhat = self.get_slot(var, "vhat")
        if self._amsbound:
            vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
            v_sqrt = math_ops.sqrt(vhat_t)
        else:
            vhat_t = state_ops.assign(vhat, vhat)
            v_sqrt = math_ops.sqrt(v_t)

        # Compute the bounds
        step_size_bound = step_size / (v_sqrt + epsilon_t)
        bounded_lr = m_t * clip_by_value(step_size_bound, lower_bound, upper_bound)

        var_update = state_ops.assign_sub(var, bounded_lr, use_locking=self._use_locking)

        return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t]) 
Example #21
Source File: AdaBound.py    From captcha_trainer with Apache License 2.0 4 votes vote down vote up
def _resource_apply_dense(self, grad, var):
        if StrictVersion(tf.__version__) >= StrictVersion('1.10.0'):
            graph = None if context.executing_eagerly() else ops.get_default_graph()
        else:
            graph = ops.get_default_graph()
        beta1_power = math_ops.cast(self._get_non_slot_variable("beta1_power", graph=graph), grad.dtype.base_dtype)
        beta2_power = math_ops.cast(self._get_non_slot_variable("beta2_power", graph=graph), grad.dtype.base_dtype)
        lr_t = math_ops.cast(self._lr_t, grad.dtype.base_dtype)
        base_lr_t = math_ops.cast(self._base_lr_t, var.dtype.base_dtype)
        beta1_t = math_ops.cast(self._beta1_t, grad.dtype.base_dtype)
        beta2_t = math_ops.cast(self._beta2_t, grad.dtype.base_dtype)
        epsilon_t = math_ops.cast(self._epsilon_t, grad.dtype.base_dtype)
        gamma_multi = math_ops.cast(self._get_non_slot_variable("gamma_multi", graph=graph), var.dtype.base_dtype)

        step_size = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
        final_lr = self._final_lr * lr_t / base_lr_t
        lower_bound = final_lr * (1. - 1. / (gamma_multi + 1.))
        upper_bound = final_lr * (1. + 1. / (gamma_multi))

        # m_t = beta1 * m + (1 - beta1) * g_t
        m = self.get_slot(var, "m")
        m_scaled_g_values = grad * (1 - beta1_t)
        m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking)

        # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
        v = self.get_slot(var, "v")
        v_scaled_g_values = (grad * grad) * (1 - beta2_t)
        v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking)

        # amsgrad
        vhat = self.get_slot(var, "vhat")
        if self._amsbound:
            vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
            v_sqrt = math_ops.sqrt(vhat_t)
        else:
            vhat_t = state_ops.assign(vhat, vhat)
            v_sqrt = math_ops.sqrt(v_t)

        # Compute the bounds
        step_size_bound = step_size / (v_sqrt + epsilon_t)
        bounded_lr = m_t * clip_by_value(step_size_bound, lower_bound, upper_bound)

        var_update = state_ops.assign_sub(var, bounded_lr, use_locking=self._use_locking)

        return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t]) 
Example #22
Source File: entropy_models.py    From pcc_geo_cnn with MIT License 4 votes vote down vote up
def compress(self, inputs):
    """Compress inputs and store their binary representations into strings.

    Args:
      inputs: `Tensor` with values to be compressed.

    Returns:
      String `Tensor` vector containing the compressed representation of each
      batch element of `inputs`.
    """
    with ops.name_scope(self._name_scope()):
      inputs = ops.convert_to_tensor(inputs)
      if not self.built:
        # Check input assumptions set before layer building, e.g. input rank.
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
        if self.dtype is None:
          self._dtype = inputs.dtype.base_dtype.name
        self.build(inputs.shape)

      # Check input assumptions set after layer building, e.g. input shape.
      if not context.executing_eagerly():
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)

      ndim = self.input_spec.ndim
      channel_axis = self._channel_axis(ndim)
      # Tuple of slices for expanding dimensions of tensors below.
      slices = ndim * [None] + [slice(None)]
      slices[channel_axis] = slice(None)
      slices = tuple(slices)

      # Expand dimensions of CDF to input dimensions, keeping the channels along
      # the right dimension.
      cdf = self._quantized_cdf[slices[1:]]
      num_levels = array_ops.shape(cdf)[-1] - 1

      # Bring inputs to the right range by centering the range on the medians.
      half = constant_op.constant(.5, dtype=self.dtype)
      medians = array_ops.squeeze(self._medians, [1, 2])
      offsets = (math_ops.cast(num_levels // 2, self.dtype) + half) - medians
      # Expand offsets to input dimensions and add to inputs.
      values = inputs + offsets[slices[:-1]]

      # Clip to range and cast to integers. Because we have added .5 above, and
      # all values are positive, the cast effectively implements rounding.
      values = math_ops.maximum(values, half)
      values = math_ops.minimum(
          values, math_ops.cast(num_levels, self.dtype) - half)
      values = math_ops.cast(values, dtypes.int16)

      def loop_body(tensor):
        return coder_ops.range_encode(
            tensor, cdf, precision=self.range_coder_precision)
      strings = functional_ops.map_fn(
          loop_body, values, dtype=dtypes.string, back_prop=False)

      if not context.executing_eagerly():
        strings.set_shape(inputs.shape[:1])

      return strings 
Example #23
Source File: AdaBound.py    From captcha_trainer with Apache License 2.0 4 votes vote down vote up
def _apply_dense(self, grad, var):
        if StrictVersion(tf.__version__) >= StrictVersion('1.10.0'):
            graph = None if context.executing_eagerly() else ops.get_default_graph()
        else:
            graph = ops.get_default_graph()
        beta1_power = math_ops.cast(self._get_non_slot_variable("beta1_power", graph=graph), var.dtype.base_dtype)
        beta2_power = math_ops.cast(self._get_non_slot_variable("beta2_power", graph=graph), var.dtype.base_dtype)
        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
        base_lr_t = math_ops.cast(self._base_lr_t, var.dtype.base_dtype)
        beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
        beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
        epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
        gamma_multi = math_ops.cast(self._get_non_slot_variable("gamma_multi", graph=graph), var.dtype.base_dtype)

        step_size = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
        final_lr = self._final_lr * lr_t / base_lr_t
        lower_bound = final_lr * (1. - 1. / (gamma_multi + 1.))
        upper_bound = final_lr * (1. + 1. / (gamma_multi))

        # m_t = beta1 * m + (1 - beta1) * g_t
        m = self.get_slot(var, "m")
        m_scaled_g_values = grad * (1 - beta1_t)
        m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking)

        # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
        v = self.get_slot(var, "v")
        v_scaled_g_values = (grad * grad) * (1 - beta2_t)
        v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking)

        # amsgrad
        vhat = self.get_slot(var, "vhat")
        if self._amsbound :
            vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
            v_sqrt = math_ops.sqrt(vhat_t)
        else:
            vhat_t = state_ops.assign(vhat, vhat)
            v_sqrt = math_ops.sqrt(v_t)

        # Compute the bounds
        step_size_bound = step_size / (v_sqrt + epsilon_t)
        bounded_lr = m_t * clip_by_value(step_size_bound, lower_bound, upper_bound)

        var_update = state_ops.assign_sub(var, bounded_lr, use_locking=self._use_locking)
        return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t]) 
Example #24
Source File: AdaBound.py    From AdaBound-Tensorflow with Apache License 2.0 4 votes vote down vote up
def _apply_dense(self, grad, var):
        graph = None if context.executing_eagerly() else ops.get_default_graph()
        beta1_power = math_ops.cast(self._get_non_slot_variable("beta1_power", graph=graph), var.dtype.base_dtype)
        beta2_power = math_ops.cast(self._get_non_slot_variable("beta2_power", graph=graph), var.dtype.base_dtype)
        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
        base_lr_t = math_ops.cast(self._base_lr_t, var.dtype.base_dtype)
        beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
        beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
        epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
        gamma_multi = math_ops.cast(self._get_non_slot_variable("gamma_multi", graph=graph), var.dtype.base_dtype)

        step_size = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
        final_lr = self._final_lr * lr_t / base_lr_t
        lower_bound = final_lr * (1. - 1. / (gamma_multi + 1.))
        upper_bound = final_lr * (1. + 1. / (gamma_multi))

        # m_t = beta1 * m + (1 - beta1) * g_t
        m = self.get_slot(var, "m")
        m_scaled_g_values = grad * (1 - beta1_t)
        m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking)

        # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
        v = self.get_slot(var, "v")
        v_scaled_g_values = (grad * grad) * (1 - beta2_t)
        v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking)

        # amsgrad
        vhat = self.get_slot(var, "vhat")
        if self._amsbound :
            vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
            v_sqrt = math_ops.sqrt(vhat_t)
        else :
            vhat_t = state_ops.assign(vhat, vhat)
            v_sqrt = math_ops.sqrt(v_t)


        # Compute the bounds
        step_size_bound = step_size / (v_sqrt + epsilon_t)
        bounded_lr = m_t * clip_by_value(step_size_bound, lower_bound, upper_bound)

        var_update = state_ops.assign_sub(var, bounded_lr, use_locking=self._use_locking)
        return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t]) 
Example #25
Source File: AdaBound.py    From AdaBound-Tensorflow with Apache License 2.0 4 votes vote down vote up
def _resource_apply_dense(self, grad, var):
        graph = None if context.executing_eagerly() else ops.get_default_graph()
        beta1_power = math_ops.cast(self._get_non_slot_variable("beta1_power", graph=graph), grad.dtype.base_dtype)
        beta2_power = math_ops.cast(self._get_non_slot_variable("beta2_power", graph=graph), grad.dtype.base_dtype)
        lr_t = math_ops.cast(self._lr_t, grad.dtype.base_dtype)
        base_lr_t = math_ops.cast(self._base_lr_t, var.dtype.base_dtype)
        beta1_t = math_ops.cast(self._beta1_t, grad.dtype.base_dtype)
        beta2_t = math_ops.cast(self._beta2_t, grad.dtype.base_dtype)
        epsilon_t = math_ops.cast(self._epsilon_t, grad.dtype.base_dtype)
        gamma_multi = math_ops.cast(self._get_non_slot_variable("gamma_multi", graph=graph), var.dtype.base_dtype)

        step_size = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
        final_lr = self._final_lr * lr_t / base_lr_t
        lower_bound = final_lr * (1. - 1. / (gamma_multi + 1.))
        upper_bound = final_lr * (1. + 1. / (gamma_multi))

        # m_t = beta1 * m + (1 - beta1) * g_t
        m = self.get_slot(var, "m")
        m_scaled_g_values = grad * (1 - beta1_t)
        m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking)

        # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
        v = self.get_slot(var, "v")
        v_scaled_g_values = (grad * grad) * (1 - beta2_t)
        v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking)

        # amsgrad
        vhat = self.get_slot(var, "vhat")
        if self._amsbound:
            vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
            v_sqrt = math_ops.sqrt(vhat_t)
        else:
            vhat_t = state_ops.assign(vhat, vhat)
            v_sqrt = math_ops.sqrt(v_t)

        # Compute the bounds
        step_size_bound = step_size / (v_sqrt + epsilon_t)
        bounded_lr = m_t * clip_by_value(step_size_bound, lower_bound, upper_bound)

        var_update = state_ops.assign_sub(var, bounded_lr, use_locking=self._use_locking)

        return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t]) 
Example #26
Source File: AdaBound.py    From AdaBound-Tensorflow with Apache License 2.0 4 votes vote down vote up
def _apply_sparse_shared(self, grad, var, indices, scatter_add):
        graph = None if context.executing_eagerly() else ops.get_default_graph()
        beta1_power = math_ops.cast(self._get_non_slot_variable("beta1_power", graph=graph), var.dtype.base_dtype)
        beta2_power = math_ops.cast(self._get_non_slot_variable("beta2_power", graph=graph), var.dtype.base_dtype)
        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
        base_lr_t = math_ops.cast(self._base_lr_t, var.dtype.base_dtype)
        beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
        beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
        epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
        gamma_t = math_ops.cast(self._gamma_t, var.dtype.base_dtype)

        step_size = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
        final_lr = self._final_lr * lr_t / base_lr_t
        lower_bound = final_lr * (1. - 1. / (gamma_t + 1.))
        upper_bound = final_lr * (1. + 1. / (gamma_t))

        # m_t = beta1 * m + (1 - beta1) * g_t
        m = self.get_slot(var, "m")
        m_scaled_g_values = grad * (1 - beta1_t)
        m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking)
        with ops.control_dependencies([m_t]):
            m_t = scatter_add(m, indices, m_scaled_g_values)

        # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
        v = self.get_slot(var, "v")
        v_scaled_g_values = (grad * grad) * (1 - beta2_t)
        v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking)
        with ops.control_dependencies([v_t]):
            v_t = scatter_add(v, indices, v_scaled_g_values)

        # amsgrad
        vhat = self.get_slot(var, "vhat")
        if self._amsbound:
            vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
            v_sqrt = math_ops.sqrt(vhat_t)
        else:
            vhat_t = state_ops.assign(vhat, vhat)
            v_sqrt = math_ops.sqrt(v_t)

        # Compute the bounds
        step_size_bound = step_size / (v_sqrt + epsilon_t)
        bounded_lr = m_t * clip_by_value(step_size_bound, lower_bound, upper_bound)

        var_update = state_ops.assign_sub(var, bounded_lr, use_locking=self._use_locking)

        return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t]) 
Example #27
Source File: main.py    From keras-onnx with MIT License 4 votes vote down vote up
def convert_keras(model, name=None, doc_string='', target_opset=None,
                  channel_first_inputs=None, debug_mode=False, custom_op_conversions=None):
    # type: (keras.Model, str, str, int, [], bool, {}) -> onnx.ModelProto
    """
    :param model: keras model
    :param name: the converted onnx model internal name
    :param doc_string: doc string
    :param target_opset: the targeted onnx model opset
    :param channel_first_inputs: A list of channel first input
    :param debug_mode: will enable the log and try to convert as much as possible on conversion
    :param custom_op_conversions: the handler for custom operator conversion
    :return an ONNX ModelProto
    """
    if isinstance(model, tf.keras.Model) and not is_tf_keras:
        raise Exception("This is a tensorflow keras model, but keras standalone converter is used." +
                        " Please set environment variable TF_KERAS = 1 before importing keras2onnx.")

    set_logger_level(logging.DEBUG if debug_mode else logging.INFO)
    if is_tf2:
        from tensorflow.python.eager import context
        k2o_logger().info("tf executing eager_mode: {}".format(context.executing_eagerly()))
        if hasattr(model, 'run_eagerly'):
            k2o_logger().info("tf.keras model eager_mode: {}".format(model.run_eagerly))
    if debug_mode:
        print(model.summary())

    name = name or model.name
    cvt_default_opset = get_maximum_opset_supported()
    if target_opset is None:
        target_opset = cvt_default_opset
    elif target_opset > cvt_default_opset:
        raise RuntimeError(
            "The opset {} conversion not support yet, the current maximum opset version supported is {}.".format(
                target_opset, cvt_default_opset))
    input_names = []
    output_names = []
    output_dict = {}
    if is_tf2 and is_tf_keras:
        tf_graph = build_layer_output_from_model(model, output_dict, input_names, output_names)
    else:
        tf_graph = model.outputs[0].graph if is_tf2 else keras.backend.get_session().graph
        output_dict = build_opdict_from_keras(model)
        output_names = [n.name for n in model.outputs]

    static_set_ke2onnx_converters(set_converter)
    register_direct_tf_ops()
    dump_graph_into_tensorboard(tf_graph)
    topology = Topology(model, tf_graph,
                        target_opset=target_opset,
                        custom_op_dict=custom_op_conversions)
    topology.debug_mode = debug_mode
    if (not model.inputs) or (not model.outputs):
        # Since Tensorflow 2.2, For the subclassed tf.keras model, there is no inputs/outputs info ...
        # ... stored in model object any more.
        parse_graph_modeless(topology, tf_graph, target_opset, input_names, output_names, output_dict)
    else:
        parse_graph(topology, tf_graph, target_opset, output_names, output_dict)
    topology.compile()

    return convert_topology(topology, name, doc_string, target_opset, channel_first_inputs) 
Example #28
Source File: test_patch_bias_add.py    From framework-determinism with Apache License 2.0 4 votes vote down vote up
def _testDeterministicGradientsCase(self, op_binding, data_layout, data_rank,
                                      data_type):
    seed = (
        hash(data_layout) % 256 + hash(data_rank) % 256 + hash(data_type) % 256)
    np.random.seed(seed)
    batch_size = 10
    channel_count = 8
    data_dim = 14
    input_shape = self._makeShapeTuple(batch_size, channel_count, data_rank,
                                       data_dim, data_layout)
    bias_shape = (channel_count,)
    output_shape = input_shape
    input_val = self._randomDataOp(input_shape, data_type)
    bias_val = self._randomDataOp(bias_shape, data_type)
    data_format = self._dataFormatFromDataLayout(data_layout)
    repeat_count = 5
    if context.executing_eagerly():

      def bias_gradients(local_seed):
        np.random.seed(local_seed)
        upstream_gradients = self._randomDataOp(output_shape, data_type)
        with backprop.GradientTape(persistent=True) as tape:
          tape.watch(bias_val)
          bias_add_output = op_binding(input_val, bias_val,
                                       data_format=data_format)
          gradient_injector_output = bias_add_output * upstream_gradients
        return tape.gradient(gradient_injector_output, bias_val)

      for i in range(repeat_count):
        local_seed = seed + i # select different upstream gradients
        result_a = bias_gradients(local_seed)
        result_b = bias_gradients(local_seed)
        self.assertAllEqual(result_a, result_b)
    else:
      upstream_gradients = array_ops.placeholder(data_type, shape=output_shape,
                                                 name='upstream_gradients')
      bias_add_output = op_binding(input_val, bias_val, data_format=data_format)
      gradient_injector_output = bias_add_output * upstream_gradients
      # The gradient function behaves as if grad_ys is multiplied by the op
      # gradient result, not passing the upstram gradients through the op's
      # gradient generation graph. This is the reason for using the
      # gradient injector
      bias_gradients = gradients_impl.gradients(
          gradient_injector_output,
          bias_val,
          grad_ys=None,
          colocate_gradients_with_ops=True)[0]
      for i in range(repeat_count):
        feed_dict = {upstream_gradients: self._randomNDArray(output_shape)}
        result_a = bias_gradients.eval(feed_dict=feed_dict)
        result_b = bias_gradients.eval(feed_dict=feed_dict)
        self.assertAllEqual(result_a, result_b) 
Example #29
Source File: test_patch_bias_add.py    From framework-determinism with Apache License 2.0 4 votes vote down vote up
def _computeGradient(self, np_input, bias, dtype, data_format):
    input_shape = output_shape = np_input.shape
    bias_shape = bias.shape
    input_tensor = constant_op.constant(
        np_input, shape=input_shape, dtype=dtype)
    bias_tensor = constant_op.constant(bias, shape=bias_shape, dtype=dtype)

    if context.executing_eagerly():

      def bias_add(input_tensor, bias_tensor):
        return nn_ops.bias_add(
            input_tensor, bias_tensor, data_format=data_format)

      # The following is a work-around for TF issue 33660. Instead of
      # calculating the analytical and numerical gradients for both
      # inputs in a single call to compute_gradient, compute_gradient
      # is called for each input separately.
      def bias_add_1(input_tensor):
        return bias_add(input_tensor, bias_tensor)

      def bias_add_2(bias_tensor):
        return bias_add(input_tensor, bias_tensor)

      input_jacob_a, input_jacob_n = gradient_checker_v2.compute_gradient(
          bias_add_1, [input_tensor])
      bias_jacob_a, bias_jacob_n = gradient_checker_v2.compute_gradient(
          bias_add_2, [bias_tensor])

      # Test gradient of BiasAddGrad
      def bias_add_grad_function(upstream_gradients):
        with backprop.GradientTape() as tape:
          tape.watch(bias_tensor)
          bias_add_output = bias_add(input_tensor, bias_tensor)
          gradient_injector_output = bias_add_output * upstream_gradients
          return tape.gradient(gradient_injector_output, bias_tensor)

      upstream_tensor = self._random_tensor(output_shape, dtype)
      grad_jacob_a, grad_jacob_n = gradient_checker_v2.compute_gradient(
          bias_add_grad_function, [upstream_tensor])
    else:
      output_tensor = nn_ops.bias_add(
          input_tensor, bias_tensor, data_format=data_format)
      jacobians = gradient_checker.compute_gradient(
          [input_tensor, bias_tensor], [input_shape, bias_shape],
          output_tensor, output_shape)
      (input_jacob_a, input_jacob_n), (bias_jacob_a, bias_jacob_n) = jacobians
      # Test gradient of BiasAddGrad
      bias_add_grad = gradients_impl.gradients(
          nn_ops.l2_loss(output_tensor), bias_tensor)[0]
      grad_jacob_a, grad_jacob_n = gradient_checker.compute_gradient(
          output_tensor, output_shape, bias_add_grad, bias_shape)

    return ((input_jacob_a, bias_jacob_a, grad_jacob_a),
            (input_jacob_n, bias_jacob_n, grad_jacob_n)) 
Example #30
Source File: patch.py    From framework-determinism with Apache License 2.0 4 votes vote down vote up
def _new_bias_add_1_14(value, bias, data_format=None, name=None):
  """Adds `bias` to `value`.

  This is (mostly) a special case of `tf.add` where `bias` is restricted to 1-D.
  Broadcasting is supported, so `value` may have any number of dimensions.
  Unlike `tf.add`, the type of `bias` is allowed to differ from `value` in the
  case where both types are quantized.

  Args:
    value: A `Tensor` with type `float`, `double`, `int64`, `int32`, `uint8`,
      `int16`, `int8`, `complex64`, or `complex128`.
    bias: A 1-D `Tensor` with size matching the channel dimension of `value`.
      Must be the same type as `value` unless `value` is a quantized type,
      in which case a different quantized type may be used.
    data_format: A string. 'N...C' and 'NC...' are supported. If `None` (the
      default) is specified then 'N..C' is assumed.
    name: A name for the operation (optional).

  Returns:
    A `Tensor` with the same type as `value`.

  Raises:
    ValueError if data format is unrecognized, if `value` has less than two
    dimensions when `data_format` is 'N..C'/`None` or `value` has less
    then three dimensions when `data_format` is `NC..`, if `bias` does not
    have exactly one dimension (is a vector), or if the size of `bias`
    does not match the size of the channel dimension of `value`.
  """
  with ops.name_scope(name, "BiasAdd", [value, bias]) as name:
    if data_format is not None:
      if data_format.startswith("NC"):
        data_format = "NCHW"
      elif data_format.startswith("N") and data_format.endswith("C"):
        data_format = "NHWC"
      else:
        raise ValueError("data_format must be of the form `N...C` or `NC...`")

    if not context.executing_eagerly():
      value = ops.convert_to_tensor(value, name="input")
      bias = ops.convert_to_tensor(bias, dtype=value.dtype, name="bias")

    if data_format == 'NCHW':
      broadcast_shape_head = [1, array_ops.size(bias)]
      broadcast_shape_tail = array_ops.ones(array_ops.rank(value) - 2,
                                            dtype=dtypes.int32)
      broadcast_shape = array_ops.concat(
          [broadcast_shape_head, broadcast_shape_tail], 0)
      return math_ops.add(
          value, array_ops.reshape(bias, broadcast_shape), name=name)
    else: # data_format == 'NHWC' or data_format == None
      return math_ops.add(value, bias, name=name)