Python tensorflow.rsqrt() Examples
The following are 30
code examples of tensorflow.rsqrt().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow
, or try the search function
.
Example #1
Source File: common_layers.py From fine-lm with MIT License | 6 votes |
def group_norm(x, filters=None, num_groups=8, epsilon=1e-5): """Group normalization as in https://arxiv.org/abs/1803.08494.""" x_shape = shape_list(x) if filters is None: filters = x_shape[-1] assert len(x_shape) == 4 assert filters % num_groups == 0 # Prepare variables. scale = tf.get_variable( "group_norm_scale", [filters], initializer=tf.ones_initializer()) bias = tf.get_variable( "group_norm_bias", [filters], initializer=tf.zeros_initializer()) epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]] # Reshape and compute group norm. x = tf.reshape(x, x_shape[:-1] + [num_groups, filters // num_groups]) # Calculate mean and variance on heights, width, channels (not groups). mean, variance = tf.nn.moments(x, [1, 2, 4], keep_dims=True) norm_x = (x - mean) * tf.rsqrt(variance + epsilon) return tf.reshape(norm_x, x_shape) * scale + bias
Example #2
Source File: layer.py From UNMT-SPR with MIT License | 6 votes |
def layer_norm(input_data, epsilon=1e-6, dtype=None, scope=None): with tf.variable_scope(scope, default_name="layer_norm"): input_size = infer_shape(input_data)[-1] scale = tf.get_variable("scale", shape=[input_size], initializer=tf.ones_initializer()) bias = tf.get_variable("bias", shape=[input_size], initializer=tf.zeros_initializer) mean = tf.reduce_mean(input_data, -1, True) variance = tf.reduce_mean(tf.square(input_data - mean), -1, True) input_norm = (input_data - mean) * tf.rsqrt(variance + epsilon) output = input_norm * scale + bias return output
Example #3
Source File: nn.py From THUMT with BSD 3-Clause "New" or "Revised" License | 6 votes |
def layer_norm(inputs, epsilon=1e-6, dtype=None, scope=None): """ Layer Normalization :param inputs: A Tensor of shape [..., channel_size] :param epsilon: A floating number :param dtype: An optional instance of tf.DType :param scope: An optional string :returns: A Tensor with the same shape as inputs """ with tf.variable_scope(scope, default_name="layer_norm", values=[inputs], dtype=dtype): channel_size = inputs.get_shape().as_list()[-1] scale = tf.get_variable("scale", shape=[channel_size], initializer=tf.ones_initializer()) offset = tf.get_variable("offset", shape=[channel_size], initializer=tf.zeros_initializer()) mean = tf.reduce_mean(inputs, -1, True) variance = tf.reduce_mean(tf.square(inputs - mean), -1, True) norm_inputs = (inputs - mean) * tf.rsqrt(variance + epsilon) return norm_inputs * scale + offset
Example #4
Source File: layer_norm_residual_conn.py From BERT with Apache License 2.0 | 6 votes |
def layer_normalization(self,x): """ x should be:[batch_size,sequence_length,d_model] :return: """ filter=x.get_shape()[-1] #last dimension of x. e.g. 512 #print("layer_normalization:==================>variable_scope:","layer_normalization"+str(self.layer_index)) with tf.variable_scope("layer_normalization"+str(self.layer_index)): # 1. normalize input by using mean and variance according to last dimension mean=tf.reduce_mean(x,axis=-1,keepdims=True) #[batch_size,sequence_length,1] variance=tf.reduce_mean(tf.square(x-mean),axis=-1,keepdims=True) #[batch_size,sequence_length,1] norm_x=(x-mean)*tf.rsqrt(variance+1e-6) #[batch_size,sequence_length,d_model] # 2. re-scale normalized input back scale=tf.get_variable("layer_norm_scale",[filter],initializer=tf.ones_initializer) #[filter] bias=tf.get_variable("layer_norm_bias",[filter],initializer=tf.ones_initializer) #[filter] output=norm_x*scale+bias #[batch_size,sequence_length,d_model] return output #[batch_size,sequence_length,d_model]
Example #5
Source File: common_attention.py From fine-lm with MIT License | 6 votes |
def scaled_dot_product_attention_simple(q, k, v, bias, name=None): """Scaled dot-product attention. One head. One spatial dimension. Args: q: a Tensor with shape [batch, length_q, depth_k] k: a Tensor with shape [batch, length_kv, depth_k] v: a Tensor with shape [batch, length_kv, depth_v] bias: optional Tensor broadcastable to [batch, length_q, length_kv] name: an optional string Returns: A Tensor. """ with tf.variable_scope( name, default_name="scaled_dot_product_attention_simple"): scalar = tf.rsqrt(tf.to_float(common_layers.shape_list(q)[2])) logits = tf.matmul(q * scalar, k, transpose_b=True) if bias is not None: logits += bias weights = tf.nn.softmax(logits, name="attention_weights") if common_layers.should_generate_summaries(): tf.summary.image( "attention", tf.expand_dims(tf.pow(weights, 0.2), 3), max_outputs=1) return tf.matmul(weights, v)
Example #6
Source File: diet.py From training_results_v0.5 with Apache License 2.0 | 6 votes |
def diet_expert(x, hidden_size, params): """A two-layer feed-forward network with relu activation on hidden layer. Uses diet variables. Recomputes hidden layer on backprop to save activation memory. Args: x: a Tensor with shape [batch, io_size] hidden_size: an integer params: a diet variable HParams object. Returns: a Tensor with shape [batch, io_size] """ @fn_with_diet_vars(params) def diet_expert_internal(x): dim = x.get_shape().as_list()[-1] h = tf.layers.dense(x, hidden_size, activation=tf.nn.relu, use_bias=False) y = tf.layers.dense(h, dim, use_bias=False) y *= tf.rsqrt(tf.to_float(dim * hidden_size)) return y return diet_expert_internal(x)
Example #7
Source File: common_layers.py From BERT with Apache License 2.0 | 6 votes |
def group_norm(x, filters=None, num_groups=8, epsilon=1e-5): """Group normalization as in https://arxiv.org/abs/1803.08494.""" x_shape = shape_list(x) if filters is None: filters = x_shape[-1] assert len(x_shape) == 4 assert filters % num_groups == 0 # Prepare variables. scale = tf.get_variable( "group_norm_scale", [filters], initializer=tf.ones_initializer()) bias = tf.get_variable( "group_norm_bias", [filters], initializer=tf.zeros_initializer()) epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]] # Reshape and compute group norm. x = tf.reshape(x, x_shape[:-1] + [num_groups, filters // num_groups]) # Calculate mean and variance on heights, width, channels (not groups). mean, variance = tf.nn.moments(x, [1, 2, 4], keep_dims=True) norm_x = (x - mean) * tf.rsqrt(variance + epsilon) return tf.reshape(norm_x, x_shape) * scale + bias
Example #8
Source File: common_layers.py From BERT with Apache License 2.0 | 6 votes |
def layer_norm_compute(x, epsilon, scale, bias, layer_collection=None): """Layer norm raw computation.""" # Save these before they get converted to tensors by the casting below params = (scale, bias) epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]] mean = tf.reduce_mean(x, axis=[-1], keepdims=True) variance = tf.reduce_mean( tf.squared_difference(x, mean), axis=[-1], keepdims=True) norm_x = (x - mean) * tf.rsqrt(variance + epsilon) output = norm_x * scale + bias return output
Example #9
Source File: learning_rate.py From fine-lm with MIT License | 6 votes |
def learning_rate_factor(name, step_num, hparams): """Compute the designated learning rate factor from hparams.""" if name == "constant": tf.logging.info("Base learning rate: %f", hparams.learning_rate_constant) return hparams.learning_rate_constant elif name == "linear_warmup": return tf.minimum(1.0, step_num / hparams.learning_rate_warmup_steps) elif name == "linear_decay": ret = (hparams.train_steps - step_num) / hparams.learning_rate_decay_steps return tf.minimum(1.0, tf.maximum(0.0, ret)) elif name == "rsqrt_decay": return tf.rsqrt(tf.maximum(step_num, hparams.learning_rate_warmup_steps)) elif name == "rsqrt_hidden_size": return hparams.hidden_size ** -0.5 elif name == "legacy": return legacy_learning_rate_schedule(hparams) else: raise ValueError("unknown learning rate factor %s" % name)
Example #10
Source File: diet.py From fine-lm with MIT License | 6 votes |
def diet_expert(x, hidden_size, params): """A two-layer feed-forward network with relu activation on hidden layer. Uses diet variables. Recomputes hidden layer on backprop to save activation memory. Args: x: a Tensor with shape [batch, io_size] hidden_size: an integer params: a diet variable HParams object. Returns: a Tensor with shape [batch, io_size] """ @fn_with_diet_vars(params) def diet_expert_internal(x): dim = x.get_shape().as_list()[-1] h = tf.layers.dense(x, hidden_size, activation=tf.nn.relu, use_bias=False) y = tf.layers.dense(h, dim, use_bias=False) y *= tf.rsqrt(tf.to_float(dim * hidden_size)) return y return diet_expert_internal(x)
Example #11
Source File: ops.py From SSGAN-Tensorflow with MIT License | 6 votes |
def instance_norm(input): """ Instance normalization """ with tf.variable_scope('instance_norm'): num_out = input.get_shape()[-1] scale = tf.get_variable( 'scale', [num_out], initializer=tf.random_normal_initializer(mean=1.0, stddev=0.02)) offset = tf.get_variable( 'offset', [num_out], initializer=tf.random_normal_initializer(mean=0.0, stddev=0.02)) mean, var = tf.nn.moments(input, axes=[1, 2], keep_dims=True) epsilon = 1e-6 inv = tf.rsqrt(var + epsilon) return scale * (input - mean) * inv + offset
Example #12
Source File: common_layers.py From training_results_v0.5 with Apache License 2.0 | 6 votes |
def group_norm(x, filters=None, num_groups=8, epsilon=1e-5): """Group normalization as in https://arxiv.org/abs/1803.08494.""" x_shape = shape_list(x) if filters is None: filters = x_shape[-1] assert len(x_shape) == 4 assert filters % num_groups == 0 # Prepare variables. scale = tf.get_variable( "group_norm_scale", [filters], initializer=tf.ones_initializer()) bias = tf.get_variable( "group_norm_bias", [filters], initializer=tf.zeros_initializer()) epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]] # Reshape and compute group norm. x = tf.reshape(x, x_shape[:-1] + [num_groups, filters // num_groups]) # Calculate mean and variance on heights, width, channels (not groups). mean, variance = tf.nn.moments(x, [1, 2, 4], keep_dims=True) norm_x = (x - mean) * tf.rsqrt(variance + epsilon) return tf.reshape(norm_x, x_shape) * scale + bias
Example #13
Source File: common_attention.py From training_results_v0.5 with Apache License 2.0 | 6 votes |
def scaled_dot_product_attention_simple(q, k, v, bias, name=None): """Scaled dot-product attention. One head. One spatial dimension. Args: q: a Tensor with shape [batch, length_q, depth_k] k: a Tensor with shape [batch, length_kv, depth_k] v: a Tensor with shape [batch, length_kv, depth_v] bias: optional Tensor broadcastable to [batch, length_q, length_kv] name: an optional string Returns: A Tensor. """ with tf.variable_scope( name, default_name="scaled_dot_product_attention_simple"): scalar = tf.rsqrt(tf.to_float(common_layers.shape_list(q)[2])) logits = tf.matmul(q * scalar, k, transpose_b=True) if bias is not None: logits += bias weights = tf.nn.softmax(logits, name="attention_weights") if common_layers.should_generate_summaries(): tf.summary.image( "attention", tf.expand_dims(tf.pow(weights, 0.2), 3), max_outputs=1) return tf.matmul(weights, v)
Example #14
Source File: diet.py From BERT with Apache License 2.0 | 6 votes |
def diet_expert(x, hidden_size, params): """A two-layer feed-forward network with relu activation on hidden layer. Uses diet variables. Recomputes hidden layer on backprop to save activation memory. Args: x: a Tensor with shape [batch, io_size] hidden_size: an integer params: a diet variable HParams object. Returns: a Tensor with shape [batch, io_size] """ @fn_with_diet_vars(params) def diet_expert_internal(x): dim = x.get_shape().as_list()[-1] h = tf.layers.dense(x, hidden_size, activation=tf.nn.relu, use_bias=False) y = tf.layers.dense(h, dim, use_bias=False) y *= tf.rsqrt(tf.to_float(dim * hidden_size)) return y return diet_expert_internal(x)
Example #15
Source File: train.py From finetune-transformer-lm with MIT License | 5 votes |
def _attn(q, k, v, train=False, scale=False): w = tf.matmul(q, k) if scale: n_state = shape_list(v)[-1] w = w*tf.rsqrt(tf.cast(n_state, tf.float32)) w = mask_attn_weights(w) w = tf.nn.softmax(w) w = dropout(w, attn_pdrop, train) a = tf.matmul(w, v) return a
Example #16
Source File: common_layers.py From training_results_v0.5 with Apache License 2.0 | 5 votes |
def l2_norm(x, filters=None, epsilon=1e-6, name=None, reuse=None): """Layer normalization with l2 norm.""" if filters is None: filters = shape_list(x)[-1] with tf.variable_scope(name, default_name="l2_norm", values=[x], reuse=reuse): scale = tf.get_variable( "l2_norm_scale", [filters], initializer=tf.ones_initializer()) bias = tf.get_variable( "l2_norm_bias", [filters], initializer=tf.zeros_initializer()) epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]] mean = tf.reduce_mean(x, axis=[-1], keepdims=True) l2norm = tf.reduce_sum(tf.square(x - mean), axis=[-1], keepdims=True) norm_x = (x - mean) * tf.rsqrt(l2norm + epsilon) return norm_x * scale + bias
Example #17
Source File: adafactor.py From training_results_v0.5 with Apache License 2.0 | 5 votes |
def _learning_rate_default(self, multiply_by_parameter_scale): learning_rate = tf.minimum(tf.rsqrt(step_num() + 1.0), 0.01) if not multiply_by_parameter_scale: learning_rate *= 0.05 return learning_rate
Example #18
Source File: learning_rate.py From training_results_v0.5 with Apache License 2.0 | 5 votes |
def learning_rate_factor(name, step_num, hparams): """Compute the designated learning rate factor from hparams.""" if name == "constant": tf.logging.info("Base learning rate: %f", hparams.learning_rate_constant) return hparams.learning_rate_constant elif name == "linear_warmup": return tf.minimum(1.0, step_num / hparams.learning_rate_warmup_steps) elif name == "linear_decay": ret = (hparams.train_steps - step_num) / hparams.learning_rate_decay_steps return tf.minimum(1.0, tf.maximum(0.0, ret)) elif name == "cosdecay": # openai gpt in_warmup = tf.cast(step_num <= hparams.learning_rate_warmup_steps, dtype=tf.float32) ret = 0.5 * (1 + tf.cos( np.pi * step_num / hparams.learning_rate_decay_steps)) # if in warmup stage return 1 else return the decayed value return in_warmup * 1 + (1 - in_warmup) * ret elif name == "rsqrt_decay": return tf.rsqrt(tf.maximum(step_num, hparams.learning_rate_warmup_steps)) elif name == "rsqrt_normalized_decay": scale = tf.sqrt(tf.to_float(hparams.learning_rate_warmup_steps)) return scale * tf.rsqrt(tf.maximum( step_num, hparams.learning_rate_warmup_steps)) elif name == "exp_decay": decay_steps = hparams.learning_rate_decay_steps warmup_steps = hparams.learning_rate_warmup_steps p = (step_num - warmup_steps) / decay_steps p = tf.maximum(p, 0.) if hparams.learning_rate_decay_staircase: p = tf.floor(p) return tf.pow(hparams.learning_rate_decay_rate, p) elif name == "rsqrt_hidden_size": return hparams.hidden_size ** -0.5 elif name == "legacy": return legacy_learning_rate_schedule(hparams) else: raise ValueError("unknown learning rate factor %s" % name)
Example #19
Source File: ops.py From PEPSI-Fast_image_inpainting_with_parallel_decoding_network with MIT License | 5 votes |
def instance_norm(input, name="instance_norm"): with tf.variable_scope(name): depth = input.get_shape()[3] scale = tf.get_variable("scale", [depth], initializer=tf.random_normal_initializer(1.0, 0.02, dtype=tf.float32)) offset = tf.get_variable("offset", [depth], initializer=tf.constant_initializer(0.0)) mean, variance = tf.nn.moments(input, axes=[1,2], keep_dims=True) epsilon = 1e-5 inv = tf.rsqrt(variance + epsilon) normalized = (input-mean)*inv return scale*normalized + offset
Example #20
Source File: models.py From realmix with Apache License 2.0 | 5 votes |
def classifier(self, x, scales, filters, repeat, training, getter=None, **kwargs): del kwargs leaky_relu = functools.partial(tf.nn.leaky_relu, alpha=0.1) bn_args = dict(training=training, momentum=0.999) def conv_args(k, f): return dict(padding='same', kernel_initializer=tf.random_normal_initializer(stddev=tf.rsqrt(0.5 * k * k * f))) def residual(x0, filters, stride=1, activate_before_residual=False): x = leaky_relu(tf.layers.batch_normalization(x0, **bn_args)) if activate_before_residual: x0 = x x = tf.layers.conv2d(x, filters, 3, strides=stride, **conv_args(3, filters)) x = leaky_relu(tf.layers.batch_normalization(x, **bn_args)) x = tf.layers.conv2d(x, filters, 3, **conv_args(3, filters)) if x0.get_shape()[3] != filters: x0 = tf.layers.conv2d(x0, filters, 1, strides=stride, **conv_args(1, filters)) return x0 + x with tf.variable_scope('classify', reuse=tf.AUTO_REUSE, custom_getter=getter): y = tf.layers.conv2d((x - self.dataset.mean) / self.dataset.std, 16, 3, **conv_args(3, 16)) for scale in range(scales): y = residual(y, filters << scale, stride=2 if scale else 1, activate_before_residual=scale == 0) for i in range(repeat - 1): y = residual(y, filters << scale) y = leaky_relu(tf.layers.batch_normalization(y, **bn_args)) y = tf.reduce_mean(y, [1, 2]) logits = tf.layers.dense(y, self.nclass, kernel_initializer=tf.glorot_normal_initializer()) return logits
Example #21
Source File: common_layers.py From training_results_v0.5 with Apache License 2.0 | 5 votes |
def layer_norm_compute(x, epsilon, scale, bias): """Layer norm raw computation.""" epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]] mean = tf.reduce_mean(x, axis=[-1], keepdims=True) variance = tf.reduce_mean(tf.square(x - mean), axis=[-1], keepdims=True) norm_x = (x - mean) * tf.rsqrt(variance + epsilon) return norm_x * scale + bias
Example #22
Source File: ops.py From tfdeploy with MIT License | 5 votes |
def test_Rsqrt(self): t = tf.rsqrt(self.random(4, 3)) self.check(t)
Example #23
Source File: networks.py From bangla-tts with GNU General Public License v3.0 | 5 votes |
def Attention(Q, K, V, mononotic_attention=False, prev_max_attentions=None): ''' Args: Q: Queries. (B, T/r, d) K: Keys. (B, N, d) V: Values. (B, N, d) mononotic_attention: A boolean. At training, it is False. prev_max_attentions: (B,). At training, it is set to None. Returns: R: [Context Vectors; Q]. (B, T/r, 2d) alignments: (B, N, T/r) max_attentions: (B, T/r) ''' A = tf.matmul(Q, K, transpose_b=True) * tf.rsqrt(tf.to_float(d)) if mononotic_attention: # for inference key_masks = tf.sequence_mask(prev_max_attentions, max_N) reverse_masks = tf.sequence_mask(max_N - attention_win_size - prev_max_attentions, max_N)[:, ::-1] masks = tf.logical_or(key_masks, reverse_masks) masks = tf.tile(tf.expand_dims(masks, 1), [1, max_T, 1]) paddings = tf.ones_like(A) * (-2 ** 32 + 1) # (B, T/r, N) A = tf.where(tf.equal(masks, False), A, paddings) A = tf.nn.softmax(A) # (B, T/r, N) max_attentions = tf.argmax(A, -1) # (B, T/r) R = tf.matmul(A, V) R = tf.concat((R, Q), -1) alignments = tf.transpose(A, [0, 2, 1]) # (B, N, T/r) return R, alignments, max_attentions
Example #24
Source File: layers.py From Question_Answering_Models with MIT License | 5 votes |
def layer_norm_compute_python(x, epsilon, scale, bias): """Layer norm raw computation.""" mean = tf.reduce_mean(x, axis=[-1], keep_dims=True) variance = tf.reduce_mean(tf.square(x - mean), axis=[-1], keep_dims=True) norm_x = (x - mean) * tf.rsqrt(variance + epsilon) return norm_x * scale + bias
Example #25
Source File: func.py From zero with BSD 3-Clause "New" or "Revised" License | 5 votes |
def rms_norm(x, eps=None, scope=None): """RMS-based Layer normalization layer""" if eps is None: eps = dtype.epsilon() with tf.variable_scope(scope or "rms_norm", dtype=tf.as_dtype(dtype.floatx())): layer_size = util.shape_list(x)[-1] scale = tf.get_variable("scale", [layer_size], initializer=tf.ones_initializer()) ms = tf.reduce_mean(x ** 2, -1, keep_dims=True) return scale * x * tf.rsqrt(ms + eps)
Example #26
Source File: func.py From zero with BSD 3-Clause "New" or "Revised" License | 5 votes |
def layer_norm(x, eps=None, scope=None): """Layer normalization layer""" if eps is None: eps = dtype.epsilon() with tf.variable_scope(scope or "layer_norm", dtype=tf.as_dtype(dtype.floatx())): layer_size = util.shape_list(x)[-1] scale = tf.get_variable("scale", [layer_size], initializer=tf.ones_initializer()) offset = tf.get_variable("offset", [layer_size], initializer=tf.zeros_initializer()) mean = tf.reduce_mean(x, -1, keep_dims=True) var = tf.reduce_mean((x - mean) ** 2, -1, keep_dims=True) return scale * (x - mean) * tf.rsqrt(var + eps) + offset
Example #27
Source File: gpt_utils.py From BERT with Apache License 2.0 | 5 votes |
def norm(x, scope, axis=-1, epsilon=1e-5): """Normalize to mean = 0, std = 1, then do a diagonal affine transform.""" with tf.variable_scope(scope): n_state = x.shape[-1].value g = tf.get_variable('g', [n_state], initializer=tf.constant_initializer(1)) b = tf.get_variable('b', [n_state], initializer=tf.constant_initializer(0)) u = tf.reduce_mean(x, axis=axis, keepdims=True) s = tf.reduce_mean(tf.square(x-u), axis=axis, keepdims=True) x = (x - u) * tf.rsqrt(s + epsilon) x = x*g + b return x
Example #28
Source File: speech_recognition.py From BERT with Apache License 2.0 | 5 votes |
def preprocess_example(self, example, mode, hparams): p = hparams if p.audio_preproc_in_bottom: example["inputs"] = tf.expand_dims( tf.expand_dims(example["waveforms"], -1), -1) else: waveforms = tf.expand_dims(example["waveforms"], 0) mel_fbanks = common_audio.compute_mel_filterbank_features( waveforms, sample_rate=p.audio_sample_rate, dither=p.audio_dither, preemphasis=p.audio_preemphasis, frame_length=p.audio_frame_length, frame_step=p.audio_frame_step, lower_edge_hertz=p.audio_lower_edge_hertz, upper_edge_hertz=p.audio_upper_edge_hertz, num_mel_bins=p.audio_num_mel_bins, apply_mask=False) if p.audio_add_delta_deltas: mel_fbanks = common_audio.add_delta_deltas(mel_fbanks) fbank_size = common_layers.shape_list(mel_fbanks) assert fbank_size[0] == 1 # This replaces CMVN estimation on data var_epsilon = 1e-09 mean = tf.reduce_mean(mel_fbanks, keepdims=True, axis=1) variance = tf.reduce_mean(tf.squared_difference(mel_fbanks, mean), keepdims=True, axis=1) mel_fbanks = (mel_fbanks - mean) * tf.rsqrt(variance + var_epsilon) # Later models like to flatten the two spatial dims. Instead, we add a # unit spatial dim and flatten the frequencies and channels. example["inputs"] = tf.concat([ tf.reshape(mel_fbanks, [fbank_size[1], fbank_size[2], fbank_size[3]]), tf.zeros((p.num_zeropad_frames, fbank_size[2], fbank_size[3]))], 0) if not p.audio_keep_example_waveforms: del example["waveforms"] return super(SpeechRecognitionProblem, self ).preprocess_example(example, mode, hparams)
Example #29
Source File: latent_layers.py From BERT with Apache License 2.0 | 5 votes |
def ae_latent_softmax(latents_pred, latents_discrete_hot, vocab_size, hparams): """Latent prediction and loss. Args: latents_pred: Tensor of shape [..., depth]. latents_discrete_hot: Tensor of shape [..., vocab_size]. vocab_size: an int representing the vocab size. hparams: HParams. Returns: sample: Tensor of shape [...], a sample from a multinomial distribution. loss: Tensor of shape [...], the softmax cross-entropy. """ with tf.variable_scope("latent_logits"): latents_logits = tf.layers.dense(latents_pred, vocab_size, name="logits_dense") if hparams.logit_normalization: latents_logits *= tf.rsqrt(1e-8 + tf.reduce_mean(tf.square(latents_logits))) loss = tf.nn.softmax_cross_entropy_with_logits_v2( labels=latents_discrete_hot, logits=latents_logits) # TODO(trandustin): tease this out from ae_latent_softmax. # we use just the loss portion to anchor prior / encoder on text. sample = multinomial_sample(latents_logits, vocab_size, hparams.sampling_method, hparams.sampling_temp) return sample, loss
Example #30
Source File: common_layers.py From BERT with Apache License 2.0 | 5 votes |
def standardize_images(x): """Image standardization on batches and videos.""" with tf.name_scope("standardize_images", values=[x]): x_shape = shape_list(x) x = to_float(tf.reshape(x, [-1] + x_shape[-3:])) x_mean = tf.reduce_mean(x, axis=[1, 2], keepdims=True) x_variance = tf.reduce_mean( tf.squared_difference(x, x_mean), axis=[1, 2], keepdims=True) num_pixels = to_float(x_shape[-2] * x_shape[-3]) x = (x - x_mean) / tf.maximum(tf.sqrt(x_variance), tf.rsqrt(num_pixels)) return tf.reshape(x, x_shape)