Python tensorflow.compat.v1.tanh() Examples
The following are 30
code examples of tensorflow.compat.v1.tanh().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.compat.v1
, or try the search function
.
Example #1
Source File: test_forward.py From incubator-tvm with Apache License 2.0 | 6 votes |
def test_forward_unary(): def _test_forward_unary(op, a_min=1, a_max=5, dtype=np.float32): """test unary operators""" np_data = np.random.uniform(a_min, a_max, size=(2, 3, 5)).astype(dtype) tf.reset_default_graph() with tf.Graph().as_default(): in_data = tf.placeholder(dtype, (2, 3, 5), name="in_data") out = op(in_data) compare_tf_with_tvm([np_data], ['in_data:0'], out.name) _test_forward_unary(tf.acos, -1, 1) _test_forward_unary(tf.asin, -1, 1) _test_forward_unary(tf.atanh, -1, 1) _test_forward_unary(tf.sinh) _test_forward_unary(tf.cosh) _test_forward_unary(tf.acosh) _test_forward_unary(tf.asinh) _test_forward_unary(tf.atan) _test_forward_unary(tf.sin) _test_forward_unary(tf.cos) _test_forward_unary(tf.tan) _test_forward_unary(tf.tanh) _test_forward_unary(tf.erf) _test_forward_unary(tf.log) _test_forward_unary(tf.log1p)
Example #2
Source File: discretization.py From tensor2tensor with Apache License 2.0 | 6 votes |
def tanh_discrete_bottleneck(x, bottleneck_bits, bottleneck_noise, discretize_warmup_steps, mode): """Simple discretization through tanh, flip bottleneck_noise many bits.""" x = tf.layers.dense(x, bottleneck_bits, name="tanh_discrete_bottleneck") d0 = tf.stop_gradient(2.0 * tf.to_float(tf.less(0.0, x))) - 1.0 if mode == tf.estimator.ModeKeys.TRAIN: x += tf.truncated_normal( common_layers.shape_list(x), mean=0.0, stddev=0.2) x = tf.tanh(x) d = x + tf.stop_gradient(2.0 * tf.to_float(tf.less(0.0, x)) - 1.0 - x) if mode == tf.estimator.ModeKeys.TRAIN: noise = tf.random_uniform(common_layers.shape_list(x)) noise = 2.0 * tf.to_float(tf.less(bottleneck_noise, noise)) - 1.0 d *= noise d = common_layers.mix(d, x, discretize_warmup_steps, mode == tf.estimator.ModeKeys.TRAIN) return d, d0
Example #3
Source File: network_functions.py From magenta with Apache License 2.0 | 6 votes |
def generator_fn_specgram(inputs, **kwargs): """Builds generator network.""" # inputs = (noises, one_hot_labels) with tf.variable_scope('generator_cond'): z = tf.concat(inputs, axis=1) if kwargs['to_rgb_activation'] == 'tanh': to_rgb_activation = tf.tanh elif kwargs['to_rgb_activation'] == 'linear': to_rgb_activation = lambda x: x fake_images, end_points = networks.generator( z, kwargs['progress'], lambda block_id: _num_filters_fn(block_id, **kwargs), kwargs['resolution_schedule'], num_blocks=kwargs['num_blocks'], kernel_size=kwargs['kernel_size'], colors=2, to_rgb_activation=to_rgb_activation, simple_arch=kwargs['simple_arch']) shape = fake_images.shape normalizer = data_normalizer.registry[kwargs['data_normalizer']](kwargs) fake_images = normalizer.denormalize_op(fake_images) fake_images.set_shape(shape) return fake_images, end_points
Example #4
Source File: util.py From nni with MIT License | 6 votes |
def lstm(xs, ms, s, scope, nh, init_scale=1.0): """lstm cell""" _, nin = [v.value for v in xs[0].get_shape()] # the first is nbatch with tf.variable_scope(scope): wx = tf.get_variable("wx", [nin, nh*4], initializer=ortho_init(init_scale)) wh = tf.get_variable("wh", [nh, nh*4], initializer=ortho_init(init_scale)) b = tf.get_variable("b", [nh*4], initializer=tf.constant_initializer(0.0)) c, h = tf.split(axis=1, num_or_size_splits=2, value=s) for idx, (x, m) in enumerate(zip(xs, ms)): c = c*(1-m) h = h*(1-m) z = tf.matmul(x, wx) + tf.matmul(h, wh) + b i, f, o, u = tf.split(axis=1, num_or_size_splits=4, value=z) i = tf.nn.sigmoid(i) f = tf.nn.sigmoid(f) o = tf.nn.sigmoid(o) u = tf.tanh(u) c = f*c + i*u h = o*tf.tanh(c) xs[idx] = h s = tf.concat(axis=1, values=[c, h]) return xs, s
Example #5
Source File: run_recurrent_model_boolq.py From language with Apache License 2.0 | 6 votes |
def apply_highway_lstm(x, seq_len): """Run a bi-directional LSTM with highway connections over `x`. Args: x: <tf.float32>[batch, seq_len, dim] seq_len: <tf.int32>[batch] for None, sequence lengths of `seq2` Returns: out, <tf.float32>[batch, seq_len, out_dim] """ lstm_out = apply_lstm(x, seq_len) proj = ops.affine(x, FLAGS.lstm_dim * 4, "w", bias_name="b") gate, transform = tf.split(proj, 2, 2) gate = tf.sigmoid(gate) transform = tf.tanh(transform) return lstm_out * gate + (1 - gate) * transform
Example #6
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 6 votes |
def conv_lstm(x, kernel_size, filters, padding="SAME", dilation_rate=(1, 1), name=None, reuse=None): """Convolutional LSTM in 1 dimension.""" with tf.variable_scope( name, default_name="conv_lstm", values=[x], reuse=reuse): gates = conv( x, 4 * filters, kernel_size, padding=padding, dilation_rate=dilation_rate) g = tf.split(layer_norm(gates, 4 * filters), 4, axis=3) new_cell = tf.sigmoid(g[0]) * x + tf.sigmoid(g[1]) * tf.tanh(g[3]) return tf.sigmoid(g[2]) * tf.tanh(new_cell)
Example #7
Source File: basic_stochastic.py From tensor2tensor with Apache License 2.0 | 6 votes |
def update_internal_states_early(self, internal_states, frames): """Update the internal states early in the network in GRU-like way.""" batch_size = common_layers.shape_list(frames[0])[0] internal_state = internal_states[0][0][:batch_size, :, :, :] state_activation = tf.concat([internal_state, frames[0]], axis=-1) state_gate_candidate = tf.layers.conv2d( state_activation, 2 * self.hparams.recurrent_state_size, (3, 3), padding="SAME", name="state_conv") state_gate, state_candidate = tf.split(state_gate_candidate, 2, axis=-1) state_gate = tf.nn.sigmoid(state_gate) state_candidate = tf.tanh(state_candidate) internal_state = internal_state * state_gate internal_state += state_candidate * (1.0 - state_gate) max_batch_size = max(_MAX_BATCH, self.hparams.batch_size) diff_batch_size = max_batch_size - batch_size internal_state = tf.pad( internal_state, [[0, diff_batch_size], [0, 0], [0, 0], [0, 0]]) return [[internal_state]]
Example #8
Source File: actor.py From Hierarchical-Actor-Critc-HAC- with MIT License | 6 votes |
def create_nn(self, features, name=None): if name is None: name = self.actor_name with tf.variable_scope(name + '_fc_1'): fc1 = layer(features, 64) with tf.variable_scope(name + '_fc_2'): fc2 = layer(fc1, 64) with tf.variable_scope(name + '_fc_3'): fc3 = layer(fc2, 64) with tf.variable_scope(name + '_fc_4'): fc4 = layer(fc3, self.action_space_size, is_output=True) output = tf.tanh(fc4) * self.action_space_bounds + self.action_offset return output
Example #9
Source File: actor.py From Hierarchical-Actor-Critc-HAC- with MIT License | 6 votes |
def create_nn(self, features, name=None): if name is None: name = self.actor_name with tf.variable_scope(name + '_fc_1'): fc1 = layer(features, 64) with tf.variable_scope(name + '_fc_2'): fc2 = layer(fc1, 64) with tf.variable_scope(name + '_fc_3'): fc3 = layer(fc2, 64) with tf.variable_scope(name + '_fc_4'): fc4 = layer(fc3, self.action_space_size, is_output=True) output = tf.tanh(fc4) * self.action_space_bounds + self.action_offset return output
Example #10
Source File: actor.py From Hierarchical-Actor-Critc-HAC- with MIT License | 6 votes |
def create_nn(self, features, name=None): if name is None: name = self.actor_name with tf.variable_scope(name + '_fc_1'): fc1 = layer(features, 64) with tf.variable_scope(name + '_fc_2'): fc2 = layer(fc1, 64) with tf.variable_scope(name + '_fc_3'): fc3 = layer(fc2, 64) with tf.variable_scope(name + '_fc_4'): fc4 = layer(fc3, self.action_space_size, is_output=True) output = tf.tanh(fc4) * self.action_space_bounds + self.action_offset return output
Example #11
Source File: actor.py From Hierarchical-Actor-Critc-HAC- with MIT License | 6 votes |
def create_nn(self, features, name=None): if name is None: name = self.actor_name with tf.variable_scope(name + '_fc_1'): fc1 = layer(features, 64) with tf.variable_scope(name + '_fc_2'): fc2 = layer(fc1, 64) with tf.variable_scope(name + '_fc_3'): fc3 = layer(fc2, 64) with tf.variable_scope(name + '_fc_4'): fc4 = layer(fc3, self.action_space_size, is_output=True) output = tf.tanh(fc4) * self.action_space_bounds + self.action_offset return output
Example #12
Source File: actor.py From Hierarchical-Actor-Critc-HAC- with MIT License | 6 votes |
def create_nn(self, features, name=None): if name is None: name = self.actor_name with tf.variable_scope(name + '_fc_1'): fc1 = layer(features, 64) with tf.variable_scope(name + '_fc_2'): fc2 = layer(fc1, 64) with tf.variable_scope(name + '_fc_3'): fc3 = layer(fc2, 64) with tf.variable_scope(name + '_fc_4'): fc4 = layer(fc3, self.action_space_size, is_output=True) output = tf.tanh(fc4) * self.action_space_bounds + self.action_offset return output
Example #13
Source File: neuralnet.py From M-LOOP with MIT License | 6 votes |
def _make_net(self, reg): ''' Helper method to create a new net with a specified regularisation coefficient. The net is not initialised, so you must call init() or load() on it before any other method. Args: reg (float): Regularisation coefficient. ''' def gelu_fast(_x): return 0.5 * _x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (_x + 0.044715 * tf.pow(_x, 3)))) creator = lambda: SingleNeuralNet( self.num_params, [64]*5, [gelu_fast]*5, 0.2, # train_threshold_ratio 16, # batch_size 1., # keep_prob reg, self.losses_list, learner_archive_dir=self.learner_archive_dir, start_datetime=self.start_datetime) return SampledNeuralNet(creator, 1)
Example #14
Source File: run_dual_encoder.py From language with Apache License 2.0 | 5 votes |
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, image_vector, use_one_hot_embeddings, scope): """Creates a model.""" model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings, scope=scope) if FLAGS.ignore_image: logit = tf.layers.dense( model.get_pooled_output(), 1, activation=tf.tanh, kernel_initializer= modeling.create_initializer(bert_config.initializer_range)) logit = tf.squeeze(logit, axis=1) else: logit = tf.einsum("ij,ij->i", tf.layers.dense( image_vector, bert_config.hidden_size, activation=tf.tanh, kernel_initializer= modeling.create_initializer(bert_config.initializer_range)), model.get_pooled_output(), name="inner") return tf.stack([-logit, logit], axis=1)
Example #15
Source File: modeling.py From training with Apache License 2.0 | 5 votes |
def gelu(x): """Gaussian Error Linear Unit. This is a smoother version of the RELU. Original paper: https://arxiv.org/abs/1606.08415 Args: x: float Tensor to perform activation. Returns: `x` with the GELU activation applied. """ cdf = 0.5 * (1.0 + tf.tanh( (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))) return x * cdf
Example #16
Source File: modeling.py From training with Apache License 2.0 | 5 votes |
def get_activation(activation_string): """Maps a string to a Python function, e.g., "relu" => `tf.nn.relu`. Args: activation_string: String name of the activation function. Returns: A Python function corresponding to the activation function. If `activation_string` is None, empty, or "linear", this will return None. If `activation_string` is not a string, it will return `activation_string`. Raises: ValueError: The `activation_string` does not correspond to a known activation. """ # We assume that anything that"s not a string is already an activation # function, so we just return it. if not isinstance(activation_string, six.string_types): return activation_string if not activation_string: return None act = activation_string.lower() if act == "linear": return None elif act == "relu": return tf.nn.relu elif act == "gelu": return gelu elif act == "tanh": return tf.tanh else: raise ValueError("Unsupported activation: %s" % act)
Example #17
Source File: model.py From gpt2-estimator with MIT License | 5 votes |
def gelu(x): return 0.5*x*(1+tf.tanh(np.sqrt(2/np.pi)*(x+0.044715*tf.pow(x, 3))))
Example #18
Source File: test_forward.py From incubator-tvm with Apache License 2.0 | 5 votes |
def test_forward_tanh(): ishape = (1, 3, 10, 10) inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32) with tf.Graph().as_default(): in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype) tf.nn.tanh(in1) compare_tf_with_tvm(inp_array, 'Placeholder:0', 'Tanh:0') ####################################################################### # Softmax # -------
Example #19
Source File: lstm_utils.py From magenta with Apache License 2.0 | 5 votes |
def initial_cell_state_from_embedding(cell, z, name=None): """Computes an initial RNN `cell` state from an embedding, `z`.""" flat_state_sizes = tf.nest.flatten(cell.state_size) return tf.nest.pack_sequence_as( cell.zero_state(batch_size=z.shape[0], dtype=tf.float32), tf.split( tf.layers.dense( z, sum(flat_state_sizes), activation=tf.tanh, kernel_initializer=tf.random_normal_initializer(stddev=0.001), name=name), flat_state_sizes, axis=1))
Example #20
Source File: models.py From graphics with Apache License 2.0 | 5 votes |
def _compute_sdf(self, x, translations, blend_terms, points): """Compute signed distances between query points and hyperplanes.""" n_parts = tf.shape(x)[1] n_planes = tf.shape(x)[2] norm_logit = x[..., 0:self._dims - 1] offset = (-(tf.nn.sigmoid(x[..., self._dims - 1:self._dims]) * self._offset_scale + self._offset_lbound)) blend_planes = ( tf.nn.sigmoid(blend_terms[..., :n_parts]) * self._blend_scale + self._blend_lbound) # Norm of the boundary line norm_rad = tf.tanh(norm_logit) * np.pi # [..., (azimuth, altitude)] if self._dims == 3: norm = tf.stack([ tf.sin(norm_rad[..., 1]) * tf.cos(norm_rad[..., 0]), tf.sin(norm_rad[..., 1]) * tf.sin(norm_rad[..., 0]), tf.cos(norm_rad[..., 1]) ], axis=-1) else: norm = tf.concat([tf.cos(norm_rad), tf.sin(norm_rad)], axis=-1) # Calculate signed distances to hyperplanes. points = ( tf.expand_dims(points, axis=1) - tf.expand_dims(translations, axis=2)) points = tf.expand_dims(points, axis=2) points = tf.tile(points, [1, 1, n_planes, 1, 1]) signed_dis = tf.matmul(points, tf.expand_dims(norm, axis=-1)) signed_dis = signed_dis + tf.expand_dims(offset, axis=-2) return signed_dis, translations, blend_planes, offset
Example #21
Source File: rnn.py From magenta with Apache License 2.0 | 5 votes |
def __call__(self, x, state, timestep=0, scope=None): with tf.variable_scope(scope or type(self).__name__): h, c = tf.split(state, 2, 1) h_size = self.num_units x_size = x.get_shape().as_list()[1] batch_size = x.get_shape().as_list()[0] w_init = None # uniform h_init = lstm_ortho_initializer(1.0) w_xh = tf.get_variable( 'W_xh', [x_size, 4 * self.num_units], initializer=w_init) w_hh = tf.get_variable( 'W_hh', [self.num_units, 4 * self.num_units], initializer=h_init) concat = tf.concat([x, h], 1) # concat for speed. w_full = tf.concat([w_xh, w_hh], 0) concat = tf.matmul(concat, w_full) #+ bias # live life without garbage. # i = input_gate, j = new_input, f = forget_gate, o = output_gate concat = layer_norm_all(concat, batch_size, 4, h_size, 'ln_all') i, j, f, o = tf.split(concat, 4, 1) if self.use_recurrent_dropout: g = tf.nn.dropout(tf.tanh(j), self.dropout_keep_prob) else: g = tf.tanh(j) new_c = c * tf.sigmoid(f + self.forget_bias) + tf.sigmoid(i) * g new_h = tf.tanh(layer_norm(new_c, h_size, 'ln_c')) * tf.sigmoid(o) return new_h, tf.concat([new_h, new_c], 1)
Example #22
Source File: rnn.py From magenta with Apache License 2.0 | 5 votes |
def __call__(self, x, state, scope=None): with tf.variable_scope(scope or type(self).__name__): c, h = tf.split(state, 2, 1) x_size = x.get_shape().as_list()[1] w_init = None # uniform h_init = lstm_ortho_initializer(1.0) # Keep W_xh and W_hh separate here as well to use different init methods. w_xh = tf.get_variable( 'W_xh', [x_size, 4 * self.num_units], initializer=w_init) w_hh = tf.get_variable( 'W_hh', [self.num_units, 4 * self.num_units], initializer=h_init) bias = tf.get_variable( 'bias', [4 * self.num_units], initializer=tf.constant_initializer(0.0)) concat = tf.concat([x, h], 1) w_full = tf.concat([w_xh, w_hh], 0) hidden = tf.matmul(concat, w_full) + bias i, j, f, o = tf.split(hidden, 4, 1) if self.use_recurrent_dropout: g = tf.nn.dropout(tf.tanh(j), self.dropout_keep_prob) else: g = tf.tanh(j) new_c = c * tf.sigmoid(f + self.forget_bias) + tf.sigmoid(i) * g new_h = tf.tanh(new_c) * tf.sigmoid(o) return new_h, tf.concat([new_c, new_h], 1) # fuk tuples.
Example #23
Source File: modeling.py From albert with Apache License 2.0 | 5 votes |
def get_activation(activation_string): """Maps a string to a Python function, e.g., "relu" => `tf.nn.relu`. Args: activation_string: String name of the activation function. Returns: A Python function corresponding to the activation function. If `activation_string` is None, empty, or "linear", this will return None. If `activation_string` is not a string, it will return `activation_string`. Raises: ValueError: The `activation_string` does not correspond to a known activation. """ # We assume that anything that"s not a string is already an activation # function, so we just return it. if not isinstance(activation_string, six.string_types): return activation_string if not activation_string: return None act = activation_string.lower() if act == "linear": return None elif act == "relu": return tf.nn.relu elif act == "gelu": return gelu elif act == "tanh": return tf.tanh else: raise ValueError("Unsupported activation: %s" % act)
Example #24
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 5 votes |
def gru_feedfwd(a_t, h_prev, filters, name=None): """position-wise Feed-fwd GRU gates following the MPNN. Args: a_t: Tensor of shape [batch, length, depth] of current input h_prev: Tensor of shape [batch, length, depth] of prev input filters: an integer specifying number of dimensions of the filters name: A string Returns: h_t: [batch, length, filters] hidden state """ with tf.variable_scope(name, default_name="GRU", values=[a_t, h_prev]): # we use right matrix multiplication to handle batches # W_z and W_r have shape 2d, d. U_z U_r have shape d,d z_t = ( tf.sigmoid( tpu_conv1d(a_t, filters, 1, padding="SAME", name="W_z") + tpu_conv1d(h_prev, filters, 1, padding="SAME", name="U_z"))) r_t = ( tf.sigmoid( tpu_conv1d(a_t, filters, 1, padding="SAME", name="W_r") + tpu_conv1d(h_prev, filters, 1, padding="SAME", name="U_r"))) h_tilde = ( tf.tanh( tpu_conv1d(a_t, filters, 1, padding="SAME", name="W") + tpu_conv1d(r_t * h_prev, filters, 1, padding="SAME", name="U"))) h_t = (1. - z_t) * h_prev + z_t * h_tilde return h_t
Example #25
Source File: rl.py From tensor2tensor with Apache License 2.0 | 5 votes |
def feed_forward_gaussian_fun(action_space, config, observations): """Feed-forward Gaussian.""" if not isinstance(action_space, gym.spaces.box.Box): raise ValueError("Expecting continuous action space.") mean_weights_initializer = tf.initializers.variance_scaling( scale=config.init_mean_factor) logstd_initializer = tf.random_normal_initializer(config.init_logstd, 1e-10) flat_observations = tf.reshape(observations, [ tf.shape(observations)[0], tf.shape(observations)[1], functools.reduce(operator.mul, observations.shape.as_list()[2:], 1)]) with tf.variable_scope("network_parameters"): with tf.variable_scope("policy"): x = flat_observations for size in config.policy_layers: x = tf.layers.dense(x, size, activation=tf.nn.relu) mean = tf.layers.dense( x, action_space.shape[0], activation=tf.tanh, kernel_initializer=mean_weights_initializer) logstd = tf.get_variable( "logstd", mean.shape[2:], tf.float32, logstd_initializer) logstd = tf.tile( logstd[None, None], [tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2)) with tf.variable_scope("value"): x = flat_observations for size in config.value_layers: x = tf.layers.dense(x, size, activation=tf.nn.relu) value = tf.layers.dense(x, 1)[..., 0] mean = tf.check_numerics(mean, "mean") logstd = tf.check_numerics(logstd, "logstd") value = tf.check_numerics(value, "value") policy = tfp.distributions.MultivariateNormalDiag(mean, tf.exp(logstd)) return NetworkOutput(policy, value, lambda a: tf.clip_by_value(a, -2., 2))
Example #26
Source File: shuffle_network.py From tensor2tensor with Apache License 2.0 | 5 votes |
def gated_linear_map(self, inputs, suffix, bias_start_reset, in_units, out_units): """Linear mapping with two reset gates. Args: inputs: Input tensor suffix: Linear map name suffix bias_start_reset: Bias start value for reset gate in_units: Size of input tensor feature map count out_units: Size of output tensor feature map count Return: tf.Tensor: Convolution apply to input tensor """ def reset_gate(name): prefix = self.prefix + name + suffix reset = conv_linear_map(inputs, in_units * 2, in_units * 2, bias_start_reset, prefix) return tf.nn.sigmoid(reset) in_shape = [self.batch_size, self.length // 2, in_units * 2] inputs = tf.reshape(inputs, in_shape) reset1 = reset_gate("/reset1/") reset2 = reset_gate("/reset2/") res1 = conv_linear_map(inputs * reset1, in_units * 2, out_units, 0.0, self.prefix + "/cand1/" + suffix) res2 = conv_linear_map(inputs * reset2, in_units * 2, out_units, 0.0, self.prefix + "/cand2/" + suffix) res = tf.concat([res1, res2], axis=2) res = tf.reshape(res, [self.batch_size, self.length, out_units]) return tf.nn.tanh(res)
Example #27
Source File: autoencoders.py From tensor2tensor with Apache License 2.0 | 5 votes |
def bottleneck(self, x): with tf.variable_scope("bottleneck"): hparams = self.hparams x = tf.layers.dense(x, hparams.bottleneck_bits, name="bottleneck") if hparams.mode == tf.estimator.ModeKeys.TRAIN: noise = 2.0 * tf.random_uniform(common_layers.shape_list(x)) - 1.0 return tf.tanh(x) + noise * hparams.bottleneck_noise, 0.0 return tf.tanh(x), 0.0
Example #28
Source File: autoencoders.py From tensor2tensor with Apache License 2.0 | 5 votes |
def bottleneck(self, x): hparams = self.hparams x = tf.tanh(tf.layers.dense(x, hparams.bottleneck_bits, name="bottleneck")) d = x + tf.stop_gradient(2.0 * tf.to_float(tf.less(0.0, x)) - 1.0 - x) if hparams.mode == tf.estimator.ModeKeys.TRAIN: noise = tf.random_uniform(common_layers.shape_list(x)) noise = 2.0 * tf.to_float(tf.less(hparams.bottleneck_noise, noise)) - 1.0 d *= noise x = common_layers.mix(d, x, hparams.discretize_warmup_steps, hparams.mode == tf.estimator.ModeKeys.TRAIN) return x, 0.0
Example #29
Source File: autoencoders.py From tensor2tensor with Apache License 2.0 | 5 votes |
def unstack(self, b, size, bottleneck_bits, name): with tf.variable_scope(name + "_unstack"): unb = self.unbottleneck(b, size) dec = self.decoder(unb) pred = tf.layers.dense(dec, bottleneck_bits, name="pred") pred_shape = common_layers.shape_list(pred) pred1 = tf.reshape(pred, pred_shape[:-1] + [-1, 2]) x, y = tf.split(pred1, 2, axis=-1) x = tf.squeeze(x, axis=[-1]) y = tf.squeeze(y, axis=[-1]) gt = 2.0 * tf.to_float(tf.less(x, y)) - 1.0 gtc = tf.tanh(y - x) gt += gtc - tf.stop_gradient(gtc) return gt, pred1
Example #30
Source File: modeling.py From albert with Apache License 2.0 | 5 votes |
def gelu(x): """Gaussian Error Linear Unit. This is a smoother version of the RELU. Original paper: https://arxiv.org/abs/1606.08415 Args: x: float Tensor to perform activation. Returns: `x` with the GELU activation applied. """ cdf = 0.5 * (1.0 + tf.tanh( (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))) return x * cdf