Python tensorflow.python.eager.context.executing_eagerly() Examples
The following are 30
code examples of tensorflow.python.eager.context.executing_eagerly().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.python.eager.context
, or try the search function
.
Example #1
Source File: layers.py From tensornets with MIT License | 6 votes |
def softmax(logits, scope=None): """Performs softmax on Nth dimension of N-dimensional logit tensor. For two-dimensional logits this reduces to tf.nn.softmax. The N-th dimension needs to have a specified number of elements (number of classes). Args: logits: N-dimensional `Tensor` with logits, where N > 1. scope: Optional scope for variable_scope. Returns: A `Tensor` with same shape and type as logits. """ # TODO(jrru): Add axis argument which defaults to last dimension. with variable_scope.variable_scope(scope, 'softmax', [logits]): num_logits = utils.last_dimension(logits.get_shape(), min_rank=2) logits_2d = array_ops.reshape(logits, [-1, num_logits]) predictions = nn.softmax(logits_2d) predictions = array_ops.reshape(predictions, array_ops.shape(logits)) if not context.executing_eagerly(): predictions.set_shape(logits.get_shape()) return predictions
Example #2
Source File: spectral_norm_dense.py From tf2rl with MIT License | 6 votes |
def call(self, inputs): w = self.compute_spectral_norm() inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) rank = common_shapes.rank(inputs) if rank > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs, w, [[rank - 1], [0]]) # Reshape the output back to the original ndim of the input. if not context.executing_eagerly(): shape = inputs.get_shape().as_list() output_shape = shape[:-1] + [self.units] outputs.set_shape(output_shape) else: outputs = gen_math_ops.mat_mul(inputs, w) if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
Example #3
Source File: temporal_convolutional_network.py From nlp-architect with Apache License 2.0 | 6 votes |
def __init__(self, layer, data_init=False, **kwargs): if not isinstance(layer, Layer): raise ValueError( "Please initialize `WeightNorm` layer with a " "`Layer` instance. You passed: {input}".format(input=layer) ) if not context.executing_eagerly() and data_init: raise NotImplementedError( "Data dependent variable initialization is not available for " "graph execution" ) self.initialized = True if data_init: self.initialized = False self.layer_depth = None self.norm_axes = None super(WeightNorm, self).__init__(layer, **kwargs) self._track_trackable(layer, name="layer")
Example #4
Source File: pad_along_dimension_op_test.py From text with Apache License 2.0 | 6 votes |
def testRaggedPadDimensionErrors(self): ragged_data = ragged_factory_ops.constant([[1, 2], [3, 4]]) self.assertRaisesRegexp( errors.InvalidArgumentError, 'axis must be between -k <= axis <= -1 OR 0 <= axis < k', pad_along_dimension_op.pad_along_dimension, ragged_data, left_pad=[0], axis=2) self.assertRaisesRegexp( ValueError, r'Shapes .* are incompatible', pad_along_dimension_op.pad_along_dimension, ragged_data, axis=1, left_pad=ragged_data) if not context.executing_eagerly(): self.assertRaisesRegexp( ValueError, 'axis may not be negative if data is ragged ' 'and data.ndims is not statically known.', pad_along_dimension_op.pad_along_dimension, ragged_tensor.RaggedTensor.from_tensor( array_ops.placeholder_with_default([[1, 2], [3, 4]], shape=None)), left_pad=[0], axis=-1)
Example #5
Source File: AdaBound.py From AdaBound-Tensorflow with Apache License 2.0 | 6 votes |
def _finish(self, update_ops, name_scope): # Update the power accumulators. with ops.control_dependencies(update_ops): graph = None if context.executing_eagerly() else ops.get_default_graph() beta1_power = self._get_non_slot_variable("beta1_power", graph=graph) beta2_power = self._get_non_slot_variable("beta2_power", graph=graph) gamma_multi = self._get_non_slot_variable("gamma_multi", graph=graph) with ops.colocate_with(beta1_power): update_beta1 = beta1_power.assign( beta1_power * self._beta1_t, use_locking=self._use_locking) update_beta2 = beta2_power.assign( beta2_power * self._beta2_t, use_locking=self._use_locking) update_gamma = gamma_multi.assign( gamma_multi + self._gamma_t, use_locking=self._use_locking) return control_flow_ops.group(*update_ops + [update_beta1, update_beta2, update_gamma], name=name_scope)
Example #6
Source File: AdaBound.py From captcha_trainer with Apache License 2.0 | 6 votes |
def _create_slots(self, var_list): first_var = min(var_list, key=lambda x: x.name) if StrictVersion(tf.__version__) >= StrictVersion('1.10.0'): graph = None if context.executing_eagerly() else ops.get_default_graph() else: graph = ops.get_default_graph() create_new = self._get_non_slot_variable("beta1_power", graph) is None if not create_new and context.in_graph_mode(): create_new = (self._get_non_slot_variable("beta1_power", graph).graph is not first_var.graph) if create_new: self._create_non_slot_variable(initial_value=self._beta1, name="beta1_power", colocate_with=first_var) self._create_non_slot_variable(initial_value=self._beta2, name="beta2_power", colocate_with=first_var) self._create_non_slot_variable(initial_value=self._gamma, name="gamma_multi", colocate_with=first_var) # Create slots for the first and second moments. for v in var_list : self._zeros_slot(v, "m", self._name) self._zeros_slot(v, "v", self._name) self._zeros_slot(v, "vhat", self._name)
Example #7
Source File: AdaBound.py From AdaBound-Tensorflow with Apache License 2.0 | 6 votes |
def _create_slots(self, var_list): first_var = min(var_list, key=lambda x: x.name) graph = None if context.executing_eagerly() else ops.get_default_graph() create_new = self._get_non_slot_variable("beta1_power", graph) is None if not create_new and context.in_graph_mode(): create_new = (self._get_non_slot_variable("beta1_power", graph).graph is not first_var.graph) if create_new: self._create_non_slot_variable(initial_value=self._beta1, name="beta1_power", colocate_with=first_var) self._create_non_slot_variable(initial_value=self._beta2, name="beta2_power", colocate_with=first_var) self._create_non_slot_variable(initial_value=self._gamma, name="gamma_multi", colocate_with=first_var) # Create slots for the first and second moments. for v in var_list : self._zeros_slot(v, "m", self._name) self._zeros_slot(v, "v", self._name) self._zeros_slot(v, "vhat", self._name)
Example #8
Source File: AdaBound.py From HyperGAN with MIT License | 5 votes |
def _create_slots(self, var_list): first_var = min(var_list, key=lambda x: x.name) graph = None if context.executing_eagerly() else ops.get_default_graph() # Create slots for the first and second moments. for v in var_list : self._zeros_slot(v, "m", self._name) self._zeros_slot(v, "v", self._name) self._zeros_slot(v, "vhat", self._name)
Example #9
Source File: temporal_convolutional_network.py From nlp-architect with Apache License 2.0 | 5 votes |
def call(self, inputs): """Call `Layer`""" if context.executing_eagerly(): if not self.initialized: self._data_dep_init(inputs) self._compute_weights() # Recompute weights for each forward pass output = self.layer.call(inputs) return output
Example #10
Source File: training.py From keras-radam with MIT License | 5 votes |
def _get_beta_accumulators(self): with ops.init_scope(): if context.executing_eagerly(): graph = None else: graph = ops.get_default_graph() return (self._get_non_slot_variable("step", graph=graph), self._get_non_slot_variable("beta1_power", graph=graph), self._get_non_slot_variable("beta2_power", graph=graph))
Example #11
Source File: RAdam.py From RAdam-Tensorflow with MIT License | 5 votes |
def _get_beta_accumulators(self): with ops.init_scope(): if context.executing_eagerly(): graph = None else: graph = ops.get_default_graph() return (self._get_non_slot_variable("step", graph=graph), self._get_non_slot_variable("beta1_power", graph=graph), self._get_non_slot_variable("beta2_power", graph=graph))
Example #12
Source File: lamb_optimizer_v1.py From training with Apache License 2.0 | 5 votes |
def _get_beta_accumulators(self): with ops.init_scope(): if context.executing_eagerly(): graph = None else: graph = ops.get_default_graph() return (self._get_non_slot_variable("beta1_power", graph=graph), self._get_non_slot_variable("beta2_power", graph=graph))
Example #13
Source File: lamb_optimizer_v1.py From training with Apache License 2.0 | 5 votes |
def _get_beta_accumulators(self): with ops.init_scope(): if context.executing_eagerly(): graph = None else: graph = ops.get_default_graph() return (self._get_non_slot_variable("beta1_power", graph=graph), self._get_non_slot_variable("beta2_power", graph=graph))
Example #14
Source File: lookahead_tensorflow.py From lookahead with MIT License | 5 votes |
def _get_la_step_accumulators(self): with ops.init_scope(): if context.executing_eagerly(): graph = None else: graph = ops.get_default_graph() return self._get_non_slot_variable("la_step", graph=graph)
Example #15
Source File: gdn.py From pcc_geo_cnn with MIT License | 5 votes |
def call(self, inputs): inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) ndim = self._input_rank if self.rectify: inputs = nn.relu(inputs) # Compute normalization pool. if ndim == 2: norm_pool = math_ops.matmul(math_ops.square(inputs), self.gamma) norm_pool = nn.bias_add(norm_pool, self.beta) elif self.data_format == "channels_last" and ndim <= 5: shape = self.gamma.shape.as_list() gamma = array_ops.reshape(self.gamma, (ndim - 2) * [1] + shape) norm_pool = nn.convolution(math_ops.square(inputs), gamma, "VALID") norm_pool = nn.bias_add(norm_pool, self.beta) else: # generic implementation # This puts channels in the last dimension regardless of input. norm_pool = math_ops.tensordot( math_ops.square(inputs), self.gamma, [[self._channel_axis()], [0]]) norm_pool += self.beta if self.data_format == "channels_first": # Return to channels_first format if necessary. axes = list(range(ndim - 1)) axes.insert(1, ndim - 1) norm_pool = array_ops.transpose(norm_pool, axes) if self.inverse: norm_pool = math_ops.sqrt(norm_pool) else: norm_pool = math_ops.rsqrt(norm_pool) outputs = inputs * norm_pool if not context.executing_eagerly(): outputs.set_shape(self.compute_output_shape(inputs.shape)) return outputs
Example #16
Source File: taware_layer.py From THRED with MIT License | 5 votes |
def __op(self, kernel, inputs, shape): if len(shape) > 2: # Broadcasting is required for the inputs. outputs = tf.tensordot(inputs, kernel, [[len(shape) - 1],[0]]) # Reshape the output back to the original ndim of the input. # if context.in_graph_mode(): # for tf > 1.5.0 if not context.executing_eagerly(): output_shape = shape[:-1] + [self.units] outputs.set_shape(output_shape) else: outputs = tf.matmul(inputs, kernel) return outputs
Example #17
Source File: RAdam.py From captcha_trainer with Apache License 2.0 | 5 votes |
def _get_beta_accumulators(self): with ops.init_scope(): if context.executing_eagerly(): graph = None else: graph = ops.get_default_graph() return (self._get_non_slot_variable("step", graph=graph), self._get_non_slot_variable("beta1_power", graph=graph), self._get_non_slot_variable("beta2_power", graph=graph))
Example #18
Source File: span_overlaps_op_test.py From text with Apache License 2.0 | 5 votes |
def testErrors(self): t = [10, 20, 30, 40, 50] with self.assertRaisesRegexp(TypeError, 'contains must be bool.'): pointer_ops.span_overlaps(t, t, t, t, contains='x') with self.assertRaisesRegexp(TypeError, 'contained_by must be bool.'): pointer_ops.span_overlaps(t, t, t, t, contained_by='x') with self.assertRaisesRegexp(TypeError, 'partial_overlap must be bool.'): pointer_ops.span_overlaps(t, t, t, t, partial_overlap='x') with self.assertRaisesRegexp( TypeError, 'source_start, source_limit, target_start, and ' 'target_limit must all have the same dtype'): pointer_ops.span_overlaps(t, t, t, [1.0, 2.0, 3.0, 4.0, 5.0]) with self.assertRaisesRegexp(ValueError, r'Shapes \(5,\) and \(4,\) are incompatible'): pointer_ops.span_overlaps(t, t[:4], t, t) with self.assertRaisesRegexp(ValueError, r'Shapes \(4,\) and \(5,\) are incompatible'): pointer_ops.span_overlaps(t, t, t[:4], t) with self.assertRaisesRegexp( ValueError, r'Shapes \(1, 5\) and \(5,\) must have the same rank'): pointer_ops.span_overlaps([t], [t], t, t) if not context.executing_eagerly(): with self.assertRaisesRegexp( ValueError, 'For ragged inputs, the shape.ndims of at least one ' 'span tensor must be statically known.'): x = ragged_tensor.RaggedTensor.from_row_splits( array_ops.placeholder(dtypes.int32), [0, 3, 8]) pointer_ops.span_overlaps(x, x, x, x) with self.assertRaisesRegexp( ValueError, 'Span tensors must all have the same ragged_rank'): a = [[10, 20, 30], [40, 50, 60]] pointer_ops.span_overlaps(a, a, a, ragged_factory_ops.constant(a)) with self.assertRaisesRegexp( errors.InvalidArgumentError, 'Mismatched ragged shapes for batch dimensions'): rt1 = ragged_factory_ops.constant([[[1, 2], [3]], [[4, 5]]]) rt2 = ragged_factory_ops.constant([[[1, 2], [3]], [[4, 5], [6]]]) pointer_ops.span_overlaps(rt1, rt1, rt2, rt2)
Example #19
Source File: qhadam.py From qhoptim with MIT License | 5 votes |
def _get_beta_weights(self): with ops.init_scope(): if context.executing_eagerly(): graph = None else: graph = ops.get_default_graph() return ( self._get_non_slot_variable("beta1_weight", graph=graph), self._get_non_slot_variable("beta2_weight", graph=graph), )
Example #20
Source File: AdaBound.py From captcha_trainer with Apache License 2.0 | 4 votes |
def _apply_sparse_shared(self, grad, var, indices, scatter_add): if StrictVersion(tf.__version__) >= StrictVersion('1.10.0'): graph = None if context.executing_eagerly() else ops.get_default_graph() else: graph = ops.get_default_graph() beta1_power = math_ops.cast(self._get_non_slot_variable("beta1_power", graph=graph), var.dtype.base_dtype) beta2_power = math_ops.cast(self._get_non_slot_variable("beta2_power", graph=graph), var.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) base_lr_t = math_ops.cast(self._base_lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) gamma_t = math_ops.cast(self._gamma_t, var.dtype.base_dtype) step_size = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) final_lr = self._final_lr * lr_t / base_lr_t lower_bound = final_lr * (1. - 1. / (gamma_t + 1.)) upper_bound = final_lr * (1. + 1. / (gamma_t)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking) with ops.control_dependencies([m_t]): m_t = scatter_add(m, indices, m_scaled_g_values) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking) with ops.control_dependencies([v_t]): v_t = scatter_add(v, indices, v_scaled_g_values) # amsgrad vhat = self.get_slot(var, "vhat") if self._amsbound: vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) else: vhat_t = state_ops.assign(vhat, vhat) v_sqrt = math_ops.sqrt(v_t) # Compute the bounds step_size_bound = step_size / (v_sqrt + epsilon_t) bounded_lr = m_t * clip_by_value(step_size_bound, lower_bound, upper_bound) var_update = state_ops.assign_sub(var, bounded_lr, use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
Example #21
Source File: AdaBound.py From captcha_trainer with Apache License 2.0 | 4 votes |
def _resource_apply_dense(self, grad, var): if StrictVersion(tf.__version__) >= StrictVersion('1.10.0'): graph = None if context.executing_eagerly() else ops.get_default_graph() else: graph = ops.get_default_graph() beta1_power = math_ops.cast(self._get_non_slot_variable("beta1_power", graph=graph), grad.dtype.base_dtype) beta2_power = math_ops.cast(self._get_non_slot_variable("beta2_power", graph=graph), grad.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, grad.dtype.base_dtype) base_lr_t = math_ops.cast(self._base_lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, grad.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, grad.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, grad.dtype.base_dtype) gamma_multi = math_ops.cast(self._get_non_slot_variable("gamma_multi", graph=graph), var.dtype.base_dtype) step_size = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) final_lr = self._final_lr * lr_t / base_lr_t lower_bound = final_lr * (1. - 1. / (gamma_multi + 1.)) upper_bound = final_lr * (1. + 1. / (gamma_multi)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking) # amsgrad vhat = self.get_slot(var, "vhat") if self._amsbound: vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) else: vhat_t = state_ops.assign(vhat, vhat) v_sqrt = math_ops.sqrt(v_t) # Compute the bounds step_size_bound = step_size / (v_sqrt + epsilon_t) bounded_lr = m_t * clip_by_value(step_size_bound, lower_bound, upper_bound) var_update = state_ops.assign_sub(var, bounded_lr, use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
Example #22
Source File: entropy_models.py From pcc_geo_cnn with MIT License | 4 votes |
def compress(self, inputs): """Compress inputs and store their binary representations into strings. Args: inputs: `Tensor` with values to be compressed. Returns: String `Tensor` vector containing the compressed representation of each batch element of `inputs`. """ with ops.name_scope(self._name_scope()): inputs = ops.convert_to_tensor(inputs) if not self.built: # Check input assumptions set before layer building, e.g. input rank. input_spec.assert_input_compatibility(self.input_spec, inputs, self.name) if self.dtype is None: self._dtype = inputs.dtype.base_dtype.name self.build(inputs.shape) # Check input assumptions set after layer building, e.g. input shape. if not context.executing_eagerly(): input_spec.assert_input_compatibility(self.input_spec, inputs, self.name) ndim = self.input_spec.ndim channel_axis = self._channel_axis(ndim) # Tuple of slices for expanding dimensions of tensors below. slices = ndim * [None] + [slice(None)] slices[channel_axis] = slice(None) slices = tuple(slices) # Expand dimensions of CDF to input dimensions, keeping the channels along # the right dimension. cdf = self._quantized_cdf[slices[1:]] num_levels = array_ops.shape(cdf)[-1] - 1 # Bring inputs to the right range by centering the range on the medians. half = constant_op.constant(.5, dtype=self.dtype) medians = array_ops.squeeze(self._medians, [1, 2]) offsets = (math_ops.cast(num_levels // 2, self.dtype) + half) - medians # Expand offsets to input dimensions and add to inputs. values = inputs + offsets[slices[:-1]] # Clip to range and cast to integers. Because we have added .5 above, and # all values are positive, the cast effectively implements rounding. values = math_ops.maximum(values, half) values = math_ops.minimum( values, math_ops.cast(num_levels, self.dtype) - half) values = math_ops.cast(values, dtypes.int16) def loop_body(tensor): return coder_ops.range_encode( tensor, cdf, precision=self.range_coder_precision) strings = functional_ops.map_fn( loop_body, values, dtype=dtypes.string, back_prop=False) if not context.executing_eagerly(): strings.set_shape(inputs.shape[:1]) return strings
Example #23
Source File: AdaBound.py From captcha_trainer with Apache License 2.0 | 4 votes |
def _apply_dense(self, grad, var): if StrictVersion(tf.__version__) >= StrictVersion('1.10.0'): graph = None if context.executing_eagerly() else ops.get_default_graph() else: graph = ops.get_default_graph() beta1_power = math_ops.cast(self._get_non_slot_variable("beta1_power", graph=graph), var.dtype.base_dtype) beta2_power = math_ops.cast(self._get_non_slot_variable("beta2_power", graph=graph), var.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) base_lr_t = math_ops.cast(self._base_lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) gamma_multi = math_ops.cast(self._get_non_slot_variable("gamma_multi", graph=graph), var.dtype.base_dtype) step_size = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) final_lr = self._final_lr * lr_t / base_lr_t lower_bound = final_lr * (1. - 1. / (gamma_multi + 1.)) upper_bound = final_lr * (1. + 1. / (gamma_multi)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking) # amsgrad vhat = self.get_slot(var, "vhat") if self._amsbound : vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) else: vhat_t = state_ops.assign(vhat, vhat) v_sqrt = math_ops.sqrt(v_t) # Compute the bounds step_size_bound = step_size / (v_sqrt + epsilon_t) bounded_lr = m_t * clip_by_value(step_size_bound, lower_bound, upper_bound) var_update = state_ops.assign_sub(var, bounded_lr, use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
Example #24
Source File: AdaBound.py From AdaBound-Tensorflow with Apache License 2.0 | 4 votes |
def _apply_dense(self, grad, var): graph = None if context.executing_eagerly() else ops.get_default_graph() beta1_power = math_ops.cast(self._get_non_slot_variable("beta1_power", graph=graph), var.dtype.base_dtype) beta2_power = math_ops.cast(self._get_non_slot_variable("beta2_power", graph=graph), var.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) base_lr_t = math_ops.cast(self._base_lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) gamma_multi = math_ops.cast(self._get_non_slot_variable("gamma_multi", graph=graph), var.dtype.base_dtype) step_size = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) final_lr = self._final_lr * lr_t / base_lr_t lower_bound = final_lr * (1. - 1. / (gamma_multi + 1.)) upper_bound = final_lr * (1. + 1. / (gamma_multi)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking) # amsgrad vhat = self.get_slot(var, "vhat") if self._amsbound : vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) else : vhat_t = state_ops.assign(vhat, vhat) v_sqrt = math_ops.sqrt(v_t) # Compute the bounds step_size_bound = step_size / (v_sqrt + epsilon_t) bounded_lr = m_t * clip_by_value(step_size_bound, lower_bound, upper_bound) var_update = state_ops.assign_sub(var, bounded_lr, use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
Example #25
Source File: AdaBound.py From AdaBound-Tensorflow with Apache License 2.0 | 4 votes |
def _resource_apply_dense(self, grad, var): graph = None if context.executing_eagerly() else ops.get_default_graph() beta1_power = math_ops.cast(self._get_non_slot_variable("beta1_power", graph=graph), grad.dtype.base_dtype) beta2_power = math_ops.cast(self._get_non_slot_variable("beta2_power", graph=graph), grad.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, grad.dtype.base_dtype) base_lr_t = math_ops.cast(self._base_lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, grad.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, grad.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, grad.dtype.base_dtype) gamma_multi = math_ops.cast(self._get_non_slot_variable("gamma_multi", graph=graph), var.dtype.base_dtype) step_size = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) final_lr = self._final_lr * lr_t / base_lr_t lower_bound = final_lr * (1. - 1. / (gamma_multi + 1.)) upper_bound = final_lr * (1. + 1. / (gamma_multi)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking) # amsgrad vhat = self.get_slot(var, "vhat") if self._amsbound: vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) else: vhat_t = state_ops.assign(vhat, vhat) v_sqrt = math_ops.sqrt(v_t) # Compute the bounds step_size_bound = step_size / (v_sqrt + epsilon_t) bounded_lr = m_t * clip_by_value(step_size_bound, lower_bound, upper_bound) var_update = state_ops.assign_sub(var, bounded_lr, use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
Example #26
Source File: AdaBound.py From AdaBound-Tensorflow with Apache License 2.0 | 4 votes |
def _apply_sparse_shared(self, grad, var, indices, scatter_add): graph = None if context.executing_eagerly() else ops.get_default_graph() beta1_power = math_ops.cast(self._get_non_slot_variable("beta1_power", graph=graph), var.dtype.base_dtype) beta2_power = math_ops.cast(self._get_non_slot_variable("beta2_power", graph=graph), var.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) base_lr_t = math_ops.cast(self._base_lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) gamma_t = math_ops.cast(self._gamma_t, var.dtype.base_dtype) step_size = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) final_lr = self._final_lr * lr_t / base_lr_t lower_bound = final_lr * (1. - 1. / (gamma_t + 1.)) upper_bound = final_lr * (1. + 1. / (gamma_t)) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking) with ops.control_dependencies([m_t]): m_t = scatter_add(m, indices, m_scaled_g_values) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking) with ops.control_dependencies([v_t]): v_t = scatter_add(v, indices, v_scaled_g_values) # amsgrad vhat = self.get_slot(var, "vhat") if self._amsbound: vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) v_sqrt = math_ops.sqrt(vhat_t) else: vhat_t = state_ops.assign(vhat, vhat) v_sqrt = math_ops.sqrt(v_t) # Compute the bounds step_size_bound = step_size / (v_sqrt + epsilon_t) bounded_lr = m_t * clip_by_value(step_size_bound, lower_bound, upper_bound) var_update = state_ops.assign_sub(var, bounded_lr, use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
Example #27
Source File: main.py From keras-onnx with MIT License | 4 votes |
def convert_keras(model, name=None, doc_string='', target_opset=None, channel_first_inputs=None, debug_mode=False, custom_op_conversions=None): # type: (keras.Model, str, str, int, [], bool, {}) -> onnx.ModelProto """ :param model: keras model :param name: the converted onnx model internal name :param doc_string: doc string :param target_opset: the targeted onnx model opset :param channel_first_inputs: A list of channel first input :param debug_mode: will enable the log and try to convert as much as possible on conversion :param custom_op_conversions: the handler for custom operator conversion :return an ONNX ModelProto """ if isinstance(model, tf.keras.Model) and not is_tf_keras: raise Exception("This is a tensorflow keras model, but keras standalone converter is used." + " Please set environment variable TF_KERAS = 1 before importing keras2onnx.") set_logger_level(logging.DEBUG if debug_mode else logging.INFO) if is_tf2: from tensorflow.python.eager import context k2o_logger().info("tf executing eager_mode: {}".format(context.executing_eagerly())) if hasattr(model, 'run_eagerly'): k2o_logger().info("tf.keras model eager_mode: {}".format(model.run_eagerly)) if debug_mode: print(model.summary()) name = name or model.name cvt_default_opset = get_maximum_opset_supported() if target_opset is None: target_opset = cvt_default_opset elif target_opset > cvt_default_opset: raise RuntimeError( "The opset {} conversion not support yet, the current maximum opset version supported is {}.".format( target_opset, cvt_default_opset)) input_names = [] output_names = [] output_dict = {} if is_tf2 and is_tf_keras: tf_graph = build_layer_output_from_model(model, output_dict, input_names, output_names) else: tf_graph = model.outputs[0].graph if is_tf2 else keras.backend.get_session().graph output_dict = build_opdict_from_keras(model) output_names = [n.name for n in model.outputs] static_set_ke2onnx_converters(set_converter) register_direct_tf_ops() dump_graph_into_tensorboard(tf_graph) topology = Topology(model, tf_graph, target_opset=target_opset, custom_op_dict=custom_op_conversions) topology.debug_mode = debug_mode if (not model.inputs) or (not model.outputs): # Since Tensorflow 2.2, For the subclassed tf.keras model, there is no inputs/outputs info ... # ... stored in model object any more. parse_graph_modeless(topology, tf_graph, target_opset, input_names, output_names, output_dict) else: parse_graph(topology, tf_graph, target_opset, output_names, output_dict) topology.compile() return convert_topology(topology, name, doc_string, target_opset, channel_first_inputs)
Example #28
Source File: test_patch_bias_add.py From framework-determinism with Apache License 2.0 | 4 votes |
def _testDeterministicGradientsCase(self, op_binding, data_layout, data_rank, data_type): seed = ( hash(data_layout) % 256 + hash(data_rank) % 256 + hash(data_type) % 256) np.random.seed(seed) batch_size = 10 channel_count = 8 data_dim = 14 input_shape = self._makeShapeTuple(batch_size, channel_count, data_rank, data_dim, data_layout) bias_shape = (channel_count,) output_shape = input_shape input_val = self._randomDataOp(input_shape, data_type) bias_val = self._randomDataOp(bias_shape, data_type) data_format = self._dataFormatFromDataLayout(data_layout) repeat_count = 5 if context.executing_eagerly(): def bias_gradients(local_seed): np.random.seed(local_seed) upstream_gradients = self._randomDataOp(output_shape, data_type) with backprop.GradientTape(persistent=True) as tape: tape.watch(bias_val) bias_add_output = op_binding(input_val, bias_val, data_format=data_format) gradient_injector_output = bias_add_output * upstream_gradients return tape.gradient(gradient_injector_output, bias_val) for i in range(repeat_count): local_seed = seed + i # select different upstream gradients result_a = bias_gradients(local_seed) result_b = bias_gradients(local_seed) self.assertAllEqual(result_a, result_b) else: upstream_gradients = array_ops.placeholder(data_type, shape=output_shape, name='upstream_gradients') bias_add_output = op_binding(input_val, bias_val, data_format=data_format) gradient_injector_output = bias_add_output * upstream_gradients # The gradient function behaves as if grad_ys is multiplied by the op # gradient result, not passing the upstram gradients through the op's # gradient generation graph. This is the reason for using the # gradient injector bias_gradients = gradients_impl.gradients( gradient_injector_output, bias_val, grad_ys=None, colocate_gradients_with_ops=True)[0] for i in range(repeat_count): feed_dict = {upstream_gradients: self._randomNDArray(output_shape)} result_a = bias_gradients.eval(feed_dict=feed_dict) result_b = bias_gradients.eval(feed_dict=feed_dict) self.assertAllEqual(result_a, result_b)
Example #29
Source File: test_patch_bias_add.py From framework-determinism with Apache License 2.0 | 4 votes |
def _computeGradient(self, np_input, bias, dtype, data_format): input_shape = output_shape = np_input.shape bias_shape = bias.shape input_tensor = constant_op.constant( np_input, shape=input_shape, dtype=dtype) bias_tensor = constant_op.constant(bias, shape=bias_shape, dtype=dtype) if context.executing_eagerly(): def bias_add(input_tensor, bias_tensor): return nn_ops.bias_add( input_tensor, bias_tensor, data_format=data_format) # The following is a work-around for TF issue 33660. Instead of # calculating the analytical and numerical gradients for both # inputs in a single call to compute_gradient, compute_gradient # is called for each input separately. def bias_add_1(input_tensor): return bias_add(input_tensor, bias_tensor) def bias_add_2(bias_tensor): return bias_add(input_tensor, bias_tensor) input_jacob_a, input_jacob_n = gradient_checker_v2.compute_gradient( bias_add_1, [input_tensor]) bias_jacob_a, bias_jacob_n = gradient_checker_v2.compute_gradient( bias_add_2, [bias_tensor]) # Test gradient of BiasAddGrad def bias_add_grad_function(upstream_gradients): with backprop.GradientTape() as tape: tape.watch(bias_tensor) bias_add_output = bias_add(input_tensor, bias_tensor) gradient_injector_output = bias_add_output * upstream_gradients return tape.gradient(gradient_injector_output, bias_tensor) upstream_tensor = self._random_tensor(output_shape, dtype) grad_jacob_a, grad_jacob_n = gradient_checker_v2.compute_gradient( bias_add_grad_function, [upstream_tensor]) else: output_tensor = nn_ops.bias_add( input_tensor, bias_tensor, data_format=data_format) jacobians = gradient_checker.compute_gradient( [input_tensor, bias_tensor], [input_shape, bias_shape], output_tensor, output_shape) (input_jacob_a, input_jacob_n), (bias_jacob_a, bias_jacob_n) = jacobians # Test gradient of BiasAddGrad bias_add_grad = gradients_impl.gradients( nn_ops.l2_loss(output_tensor), bias_tensor)[0] grad_jacob_a, grad_jacob_n = gradient_checker.compute_gradient( output_tensor, output_shape, bias_add_grad, bias_shape) return ((input_jacob_a, bias_jacob_a, grad_jacob_a), (input_jacob_n, bias_jacob_n, grad_jacob_n))
Example #30
Source File: patch.py From framework-determinism with Apache License 2.0 | 4 votes |
def _new_bias_add_1_14(value, bias, data_format=None, name=None): """Adds `bias` to `value`. This is (mostly) a special case of `tf.add` where `bias` is restricted to 1-D. Broadcasting is supported, so `value` may have any number of dimensions. Unlike `tf.add`, the type of `bias` is allowed to differ from `value` in the case where both types are quantized. Args: value: A `Tensor` with type `float`, `double`, `int64`, `int32`, `uint8`, `int16`, `int8`, `complex64`, or `complex128`. bias: A 1-D `Tensor` with size matching the channel dimension of `value`. Must be the same type as `value` unless `value` is a quantized type, in which case a different quantized type may be used. data_format: A string. 'N...C' and 'NC...' are supported. If `None` (the default) is specified then 'N..C' is assumed. name: A name for the operation (optional). Returns: A `Tensor` with the same type as `value`. Raises: ValueError if data format is unrecognized, if `value` has less than two dimensions when `data_format` is 'N..C'/`None` or `value` has less then three dimensions when `data_format` is `NC..`, if `bias` does not have exactly one dimension (is a vector), or if the size of `bias` does not match the size of the channel dimension of `value`. """ with ops.name_scope(name, "BiasAdd", [value, bias]) as name: if data_format is not None: if data_format.startswith("NC"): data_format = "NCHW" elif data_format.startswith("N") and data_format.endswith("C"): data_format = "NHWC" else: raise ValueError("data_format must be of the form `N...C` or `NC...`") if not context.executing_eagerly(): value = ops.convert_to_tensor(value, name="input") bias = ops.convert_to_tensor(bias, dtype=value.dtype, name="bias") if data_format == 'NCHW': broadcast_shape_head = [1, array_ops.size(bias)] broadcast_shape_tail = array_ops.ones(array_ops.rank(value) - 2, dtype=dtypes.int32) broadcast_shape = array_ops.concat( [broadcast_shape_head, broadcast_shape_tail], 0) return math_ops.add( value, array_ops.reshape(bias, broadcast_shape), name=name) else: # data_format == 'NHWC' or data_format == None return math_ops.add(value, bias, name=name)