Python Examples of tensorflow.rsqrt

Source File: common_layers.py From fine-lm with MIT License

6 votes

def group_norm(x, filters=None, num_groups=8, epsilon=1e-5):
  """Group normalization as in https://arxiv.org/abs/1803.08494."""
  x_shape = shape_list(x)
  if filters is None:
    filters = x_shape[-1]
  assert len(x_shape) == 4
  assert filters % num_groups == 0
  # Prepare variables.
  scale = tf.get_variable(
      "group_norm_scale", [filters], initializer=tf.ones_initializer())
  bias = tf.get_variable(
      "group_norm_bias", [filters], initializer=tf.zeros_initializer())
  epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]]
  # Reshape and compute group norm.
  x = tf.reshape(x, x_shape[:-1] + [num_groups, filters // num_groups])
  # Calculate mean and variance on heights, width, channels (not groups).
  mean, variance = tf.nn.moments(x, [1, 2, 4], keep_dims=True)
  norm_x = (x - mean) * tf.rsqrt(variance + epsilon)
  return tf.reshape(norm_x, x_shape) * scale + bias

Source File: layer.py From UNMT-SPR with MIT License

6 votes

def layer_norm(input_data, 
               epsilon=1e-6, 
               dtype=None, 
               scope=None):
    with tf.variable_scope(scope, default_name="layer_norm"):
        input_size = infer_shape(input_data)[-1]

        scale = tf.get_variable("scale", shape=[input_size], 
                                initializer=tf.ones_initializer())
        bias = tf.get_variable("bias", shape=[input_size],
                                initializer=tf.zeros_initializer)
        
        mean = tf.reduce_mean(input_data, -1, True)
        variance = tf.reduce_mean(tf.square(input_data - mean), -1, True)

        input_norm = (input_data - mean) * tf.rsqrt(variance + epsilon)
        output = input_norm * scale + bias

        return output

Source File: nn.py From THUMT with BSD 3-Clause "New" or "Revised" License

6 votes

def layer_norm(inputs, epsilon=1e-6, dtype=None, scope=None):
    """
    Layer Normalization
    :param inputs: A Tensor of shape [..., channel_size]
    :param epsilon: A floating number
    :param dtype: An optional instance of tf.DType
    :param scope: An optional string
    :returns: A Tensor with the same shape as inputs
    """
    with tf.variable_scope(scope, default_name="layer_norm", values=[inputs],
                           dtype=dtype):
        channel_size = inputs.get_shape().as_list()[-1]

        scale = tf.get_variable("scale", shape=[channel_size],
                                initializer=tf.ones_initializer())

        offset = tf.get_variable("offset", shape=[channel_size],
                                 initializer=tf.zeros_initializer())

        mean = tf.reduce_mean(inputs, -1, True)
        variance = tf.reduce_mean(tf.square(inputs - mean), -1, True)

        norm_inputs = (inputs - mean) * tf.rsqrt(variance + epsilon)

        return norm_inputs * scale + offset

Source File: layer_norm_residual_conn.py From BERT with Apache License 2.0

6 votes

def layer_normalization(self,x):
        """
        x should be:[batch_size,sequence_length,d_model]
        :return:
        """
        filter=x.get_shape()[-1] #last dimension of x. e.g. 512
        #print("layer_normalization:==================>variable_scope:","layer_normalization"+str(self.layer_index))
        with tf.variable_scope("layer_normalization"+str(self.layer_index)):
            # 1. normalize input by using  mean and variance according to last dimension
            mean=tf.reduce_mean(x,axis=-1,keepdims=True) #[batch_size,sequence_length,1]
            variance=tf.reduce_mean(tf.square(x-mean),axis=-1,keepdims=True) #[batch_size,sequence_length,1]
            norm_x=(x-mean)*tf.rsqrt(variance+1e-6) #[batch_size,sequence_length,d_model]
            # 2. re-scale normalized input back
            scale=tf.get_variable("layer_norm_scale",[filter],initializer=tf.ones_initializer) #[filter]
            bias=tf.get_variable("layer_norm_bias",[filter],initializer=tf.ones_initializer) #[filter]
            output=norm_x*scale+bias #[batch_size,sequence_length,d_model]
            return output #[batch_size,sequence_length,d_model]

Source File: common_attention.py From fine-lm with MIT License

6 votes

def scaled_dot_product_attention_simple(q, k, v, bias, name=None):
  """Scaled dot-product attention. One head. One spatial dimension.

  Args:
    q: a Tensor with shape [batch, length_q, depth_k]
    k: a Tensor with shape [batch, length_kv, depth_k]
    v: a Tensor with shape [batch, length_kv, depth_v]
    bias: optional Tensor broadcastable to [batch, length_q, length_kv]
    name: an optional string

  Returns:
    A Tensor.
  """
  with tf.variable_scope(
      name, default_name="scaled_dot_product_attention_simple"):
    scalar = tf.rsqrt(tf.to_float(common_layers.shape_list(q)[2]))
    logits = tf.matmul(q * scalar, k, transpose_b=True)
    if bias is not None:
      logits += bias
    weights = tf.nn.softmax(logits, name="attention_weights")
    if common_layers.should_generate_summaries():
      tf.summary.image(
          "attention", tf.expand_dims(tf.pow(weights, 0.2), 3), max_outputs=1)
    return tf.matmul(weights, v)

Source File: diet.py From training_results_v0.5 with Apache License 2.0

6 votes

def diet_expert(x, hidden_size, params):
  """A two-layer feed-forward network with relu activation on hidden layer.

  Uses diet variables.
  Recomputes hidden layer on backprop to save activation memory.

  Args:
    x: a Tensor with shape [batch, io_size]
    hidden_size: an integer
    params: a diet variable HParams object.

  Returns:
    a Tensor with shape [batch, io_size]
  """

  @fn_with_diet_vars(params)
  def diet_expert_internal(x):
    dim = x.get_shape().as_list()[-1]
    h = tf.layers.dense(x, hidden_size, activation=tf.nn.relu, use_bias=False)
    y = tf.layers.dense(h, dim, use_bias=False)
    y *= tf.rsqrt(tf.to_float(dim * hidden_size))
    return y

  return diet_expert_internal(x)

Source File: common_layers.py From BERT with Apache License 2.0

6 votes

def group_norm(x, filters=None, num_groups=8, epsilon=1e-5):
  """Group normalization as in https://arxiv.org/abs/1803.08494."""
  x_shape = shape_list(x)
  if filters is None:
    filters = x_shape[-1]
  assert len(x_shape) == 4
  assert filters % num_groups == 0
  # Prepare variables.
  scale = tf.get_variable(
      "group_norm_scale", [filters], initializer=tf.ones_initializer())
  bias = tf.get_variable(
      "group_norm_bias", [filters], initializer=tf.zeros_initializer())
  epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]]
  # Reshape and compute group norm.
  x = tf.reshape(x, x_shape[:-1] + [num_groups, filters // num_groups])
  # Calculate mean and variance on heights, width, channels (not groups).
  mean, variance = tf.nn.moments(x, [1, 2, 4], keep_dims=True)
  norm_x = (x - mean) * tf.rsqrt(variance + epsilon)
  return tf.reshape(norm_x, x_shape) * scale + bias

Source File: common_layers.py From BERT with Apache License 2.0

6 votes

def layer_norm_compute(x, epsilon, scale, bias, layer_collection=None):
  """Layer norm raw computation."""

  # Save these before they get converted to tensors by the casting below
  params = (scale, bias)

  epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]]
  mean = tf.reduce_mean(x, axis=[-1], keepdims=True)
  variance = tf.reduce_mean(
      tf.squared_difference(x, mean), axis=[-1], keepdims=True)
  norm_x = (x - mean) * tf.rsqrt(variance + epsilon)

  output = norm_x * scale + bias


  return output

Source File: learning_rate.py From fine-lm with MIT License

6 votes

def learning_rate_factor(name, step_num, hparams):
  """Compute the designated learning rate factor from hparams."""
  if name == "constant":
    tf.logging.info("Base learning rate: %f", hparams.learning_rate_constant)
    return hparams.learning_rate_constant
  elif name == "linear_warmup":
    return tf.minimum(1.0, step_num / hparams.learning_rate_warmup_steps)
  elif name == "linear_decay":
    ret = (hparams.train_steps - step_num) / hparams.learning_rate_decay_steps
    return tf.minimum(1.0, tf.maximum(0.0, ret))
  elif name == "rsqrt_decay":
    return tf.rsqrt(tf.maximum(step_num, hparams.learning_rate_warmup_steps))
  elif name == "rsqrt_hidden_size":
    return hparams.hidden_size ** -0.5
  elif name == "legacy":
    return legacy_learning_rate_schedule(hparams)
  else:
    raise ValueError("unknown learning rate factor %s" % name)

Source File: diet.py From fine-lm with MIT License

6 votes

def diet_expert(x, hidden_size, params):
  """A two-layer feed-forward network with relu activation on hidden layer.

  Uses diet variables.
  Recomputes hidden layer on backprop to save activation memory.

  Args:
    x: a Tensor with shape [batch, io_size]
    hidden_size: an integer
    params: a diet variable HParams object.

  Returns:
    a Tensor with shape [batch, io_size]
  """

  @fn_with_diet_vars(params)
  def diet_expert_internal(x):
    dim = x.get_shape().as_list()[-1]
    h = tf.layers.dense(x, hidden_size, activation=tf.nn.relu, use_bias=False)
    y = tf.layers.dense(h, dim, use_bias=False)
    y *= tf.rsqrt(tf.to_float(dim * hidden_size))
    return y

  return diet_expert_internal(x)

Source File: ops.py From SSGAN-Tensorflow with MIT License

6 votes

def instance_norm(input):
    """
    Instance normalization
    """
    with tf.variable_scope('instance_norm'):
        num_out = input.get_shape()[-1]
        scale = tf.get_variable(
            'scale', [num_out],
            initializer=tf.random_normal_initializer(mean=1.0, stddev=0.02))
        offset = tf.get_variable(
            'offset', [num_out],
            initializer=tf.random_normal_initializer(mean=0.0, stddev=0.02))
        mean, var = tf.nn.moments(input, axes=[1, 2], keep_dims=True)
        epsilon = 1e-6
        inv = tf.rsqrt(var + epsilon)
        return scale * (input - mean) * inv + offset

Source File: common_layers.py From training_results_v0.5 with Apache License 2.0

6 votes

def group_norm(x, filters=None, num_groups=8, epsilon=1e-5):
  """Group normalization as in https://arxiv.org/abs/1803.08494."""
  x_shape = shape_list(x)
  if filters is None:
    filters = x_shape[-1]
  assert len(x_shape) == 4
  assert filters % num_groups == 0
  # Prepare variables.
  scale = tf.get_variable(
      "group_norm_scale", [filters], initializer=tf.ones_initializer())
  bias = tf.get_variable(
      "group_norm_bias", [filters], initializer=tf.zeros_initializer())
  epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]]
  # Reshape and compute group norm.
  x = tf.reshape(x, x_shape[:-1] + [num_groups, filters // num_groups])
  # Calculate mean and variance on heights, width, channels (not groups).
  mean, variance = tf.nn.moments(x, [1, 2, 4], keep_dims=True)
  norm_x = (x - mean) * tf.rsqrt(variance + epsilon)
  return tf.reshape(norm_x, x_shape) * scale + bias

Source File: common_attention.py From training_results_v0.5 with Apache License 2.0

6 votes

def scaled_dot_product_attention_simple(q, k, v, bias, name=None):
  """Scaled dot-product attention. One head. One spatial dimension.

  Args:
    q: a Tensor with shape [batch, length_q, depth_k]
    k: a Tensor with shape [batch, length_kv, depth_k]
    v: a Tensor with shape [batch, length_kv, depth_v]
    bias: optional Tensor broadcastable to [batch, length_q, length_kv]
    name: an optional string

  Returns:
    A Tensor.
  """
  with tf.variable_scope(
      name, default_name="scaled_dot_product_attention_simple"):
    scalar = tf.rsqrt(tf.to_float(common_layers.shape_list(q)[2]))
    logits = tf.matmul(q * scalar, k, transpose_b=True)
    if bias is not None:
      logits += bias
    weights = tf.nn.softmax(logits, name="attention_weights")
    if common_layers.should_generate_summaries():
      tf.summary.image(
          "attention", tf.expand_dims(tf.pow(weights, 0.2), 3), max_outputs=1)
    return tf.matmul(weights, v)

Source File: diet.py From BERT with Apache License 2.0

6 votes

def diet_expert(x, hidden_size, params):
  """A two-layer feed-forward network with relu activation on hidden layer.

  Uses diet variables.
  Recomputes hidden layer on backprop to save activation memory.

  Args:
    x: a Tensor with shape [batch, io_size]
    hidden_size: an integer
    params: a diet variable HParams object.

  Returns:
    a Tensor with shape [batch, io_size]
  """

  @fn_with_diet_vars(params)
  def diet_expert_internal(x):
    dim = x.get_shape().as_list()[-1]
    h = tf.layers.dense(x, hidden_size, activation=tf.nn.relu, use_bias=False)
    y = tf.layers.dense(h, dim, use_bias=False)
    y *= tf.rsqrt(tf.to_float(dim * hidden_size))
    return y

  return diet_expert_internal(x)

Source File: train.py From finetune-transformer-lm with MIT License

5 votes

def _attn(q, k, v, train=False, scale=False):
    w = tf.matmul(q, k)

    if scale:
        n_state = shape_list(v)[-1]
        w = w*tf.rsqrt(tf.cast(n_state, tf.float32))

    w = mask_attn_weights(w)
    w = tf.nn.softmax(w)

    w = dropout(w, attn_pdrop, train)

    a = tf.matmul(w, v)
    return a

Source File: common_layers.py From training_results_v0.5 with Apache License 2.0

5 votes

def l2_norm(x, filters=None, epsilon=1e-6, name=None, reuse=None):
  """Layer normalization with l2 norm."""
  if filters is None:
    filters = shape_list(x)[-1]
  with tf.variable_scope(name, default_name="l2_norm", values=[x], reuse=reuse):
    scale = tf.get_variable(
        "l2_norm_scale", [filters], initializer=tf.ones_initializer())
    bias = tf.get_variable(
        "l2_norm_bias", [filters], initializer=tf.zeros_initializer())
    epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]]
    mean = tf.reduce_mean(x, axis=[-1], keepdims=True)
    l2norm = tf.reduce_sum(tf.square(x - mean), axis=[-1], keepdims=True)
    norm_x = (x - mean) * tf.rsqrt(l2norm + epsilon)
    return norm_x * scale + bias

Source File: adafactor.py From training_results_v0.5 with Apache License 2.0

5 votes

def _learning_rate_default(self, multiply_by_parameter_scale):
    learning_rate = tf.minimum(tf.rsqrt(step_num() + 1.0), 0.01)
    if not multiply_by_parameter_scale:
      learning_rate *= 0.05
    return learning_rate

Source File: learning_rate.py From training_results_v0.5 with Apache License 2.0

5 votes

def learning_rate_factor(name, step_num, hparams):
  """Compute the designated learning rate factor from hparams."""
  if name == "constant":
    tf.logging.info("Base learning rate: %f", hparams.learning_rate_constant)
    return hparams.learning_rate_constant
  elif name == "linear_warmup":
    return tf.minimum(1.0, step_num / hparams.learning_rate_warmup_steps)
  elif name == "linear_decay":
    ret = (hparams.train_steps - step_num) / hparams.learning_rate_decay_steps
    return tf.minimum(1.0, tf.maximum(0.0, ret))
  elif name == "cosdecay":  # openai gpt
    in_warmup = tf.cast(step_num <= hparams.learning_rate_warmup_steps,
                        dtype=tf.float32)
    ret = 0.5 * (1 + tf.cos(
        np.pi * step_num / hparams.learning_rate_decay_steps))
    # if in warmup stage return 1 else return the decayed value
    return in_warmup * 1 + (1 - in_warmup) * ret
  elif name == "rsqrt_decay":
    return tf.rsqrt(tf.maximum(step_num, hparams.learning_rate_warmup_steps))
  elif name == "rsqrt_normalized_decay":
    scale = tf.sqrt(tf.to_float(hparams.learning_rate_warmup_steps))
    return scale * tf.rsqrt(tf.maximum(
        step_num, hparams.learning_rate_warmup_steps))
  elif name == "exp_decay":
    decay_steps = hparams.learning_rate_decay_steps
    warmup_steps = hparams.learning_rate_warmup_steps
    p = (step_num - warmup_steps) / decay_steps
    p = tf.maximum(p, 0.)
    if hparams.learning_rate_decay_staircase:
      p = tf.floor(p)
    return tf.pow(hparams.learning_rate_decay_rate, p)
  elif name == "rsqrt_hidden_size":
    return hparams.hidden_size ** -0.5
  elif name == "legacy":
    return legacy_learning_rate_schedule(hparams)
  else:
    raise ValueError("unknown learning rate factor %s" % name)

Source File: ops.py From PEPSI-Fast_image_inpainting_with_parallel_decoding_network with MIT License

5 votes

def instance_norm(input, name="instance_norm"):
    with tf.variable_scope(name):
        depth = input.get_shape()[3]
        scale = tf.get_variable("scale", [depth], initializer=tf.random_normal_initializer(1.0, 0.02, dtype=tf.float32))
        offset = tf.get_variable("offset", [depth], initializer=tf.constant_initializer(0.0))
        mean, variance = tf.nn.moments(input, axes=[1,2], keep_dims=True)
        epsilon = 1e-5
        inv = tf.rsqrt(variance + epsilon)
        normalized = (input-mean)*inv
        return scale*normalized + offset

Source File: models.py From realmix with Apache License 2.0

5 votes

def classifier(self, x, scales, filters, repeat, training, getter=None, **kwargs):
        del kwargs
        leaky_relu = functools.partial(tf.nn.leaky_relu, alpha=0.1)
        bn_args = dict(training=training, momentum=0.999)

        def conv_args(k, f):
            return dict(padding='same',
                        kernel_initializer=tf.random_normal_initializer(stddev=tf.rsqrt(0.5 * k * k * f)))

        def residual(x0, filters, stride=1, activate_before_residual=False):
            x = leaky_relu(tf.layers.batch_normalization(x0, **bn_args))
            if activate_before_residual:
                x0 = x

            x = tf.layers.conv2d(x, filters, 3, strides=stride, **conv_args(3, filters))
            x = leaky_relu(tf.layers.batch_normalization(x, **bn_args))
            x = tf.layers.conv2d(x, filters, 3, **conv_args(3, filters))

            if x0.get_shape()[3] != filters:
                x0 = tf.layers.conv2d(x0, filters, 1, strides=stride, **conv_args(1, filters))

            return x0 + x

        with tf.variable_scope('classify', reuse=tf.AUTO_REUSE, custom_getter=getter):
            y = tf.layers.conv2d((x - self.dataset.mean) / self.dataset.std, 16, 3, **conv_args(3, 16))
            for scale in range(scales):
                y = residual(y, filters << scale, stride=2 if scale else 1, activate_before_residual=scale == 0)
                for i in range(repeat - 1):
                    y = residual(y, filters << scale)

            y = leaky_relu(tf.layers.batch_normalization(y, **bn_args))
            y = tf.reduce_mean(y, [1, 2])
            logits = tf.layers.dense(y, self.nclass, kernel_initializer=tf.glorot_normal_initializer())
        return logits

Source File: common_layers.py From training_results_v0.5 with Apache License 2.0

5 votes

def layer_norm_compute(x, epsilon, scale, bias):
  """Layer norm raw computation."""
  epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]]
  mean = tf.reduce_mean(x, axis=[-1], keepdims=True)
  variance = tf.reduce_mean(tf.square(x - mean), axis=[-1], keepdims=True)
  norm_x = (x - mean) * tf.rsqrt(variance + epsilon)
  return norm_x * scale + bias

Source File: ops.py From tfdeploy with MIT License

5 votes

def test_Rsqrt(self):
        t = tf.rsqrt(self.random(4, 3))
        self.check(t)

Source File: networks.py From bangla-tts with GNU General Public License v3.0

5 votes

def Attention(Q, K, V, mononotic_attention=False, prev_max_attentions=None):
    '''
    Args:
      Q: Queries. (B, T/r, d)
      K: Keys. (B, N, d)
      V: Values. (B, N, d)
      mononotic_attention: A boolean. At training, it is False.
      prev_max_attentions: (B,). At training, it is set to None.

    Returns:
      R: [Context Vectors; Q]. (B, T/r, 2d)
      alignments: (B, N, T/r)
      max_attentions: (B, T/r)
    '''
    A = tf.matmul(Q, K, transpose_b=True) * tf.rsqrt(tf.to_float(d))
    if mononotic_attention:  # for inference
        key_masks = tf.sequence_mask(prev_max_attentions, max_N)
        reverse_masks = tf.sequence_mask(max_N - attention_win_size - prev_max_attentions, max_N)[:, ::-1]
        masks = tf.logical_or(key_masks, reverse_masks)
        masks = tf.tile(tf.expand_dims(masks, 1), [1, max_T, 1])
        paddings = tf.ones_like(A) * (-2 ** 32 + 1)  # (B, T/r, N)
        A = tf.where(tf.equal(masks, False), A, paddings)
    A = tf.nn.softmax(A) # (B, T/r, N)
    max_attentions = tf.argmax(A, -1)  # (B, T/r)
    R = tf.matmul(A, V)
    R = tf.concat((R, Q), -1)

    alignments = tf.transpose(A, [0, 2, 1]) # (B, N, T/r)

    return R, alignments, max_attentions

Source File: layers.py From Question_Answering_Models with MIT License

5 votes

def layer_norm_compute_python(x, epsilon, scale, bias):
    """Layer norm raw computation."""
    mean = tf.reduce_mean(x, axis=[-1], keep_dims=True)
    variance = tf.reduce_mean(tf.square(x - mean), axis=[-1], keep_dims=True)
    norm_x = (x - mean) * tf.rsqrt(variance + epsilon)
    return norm_x * scale + bias

Source File: func.py From zero with BSD 3-Clause "New" or "Revised" License

5 votes

def rms_norm(x, eps=None, scope=None):
    """RMS-based Layer normalization layer"""
    if eps is None:
        eps = dtype.epsilon()
    with tf.variable_scope(scope or "rms_norm",
                           dtype=tf.as_dtype(dtype.floatx())):
        layer_size = util.shape_list(x)[-1]

        scale = tf.get_variable("scale", [layer_size], initializer=tf.ones_initializer())

        ms = tf.reduce_mean(x ** 2, -1, keep_dims=True)

        return scale * x * tf.rsqrt(ms + eps)

Source File: func.py From zero with BSD 3-Clause "New" or "Revised" License

5 votes

def layer_norm(x, eps=None, scope=None):
    """Layer normalization layer"""
    if eps is None:
        eps = dtype.epsilon()
    with tf.variable_scope(scope or "layer_norm",
                           dtype=tf.as_dtype(dtype.floatx())):
        layer_size = util.shape_list(x)[-1]

        scale = tf.get_variable("scale", [layer_size], initializer=tf.ones_initializer())
        offset = tf.get_variable("offset", [layer_size], initializer=tf.zeros_initializer())

        mean = tf.reduce_mean(x, -1, keep_dims=True)
        var = tf.reduce_mean((x - mean) ** 2, -1, keep_dims=True)

        return scale * (x - mean) * tf.rsqrt(var + eps) + offset

Source File: gpt_utils.py From BERT with Apache License 2.0

5 votes

def norm(x, scope, axis=-1, epsilon=1e-5):
	"""Normalize to mean = 0, std = 1, then do a diagonal affine transform."""
	with tf.variable_scope(scope):
		n_state = x.shape[-1].value
		g = tf.get_variable('g', [n_state], initializer=tf.constant_initializer(1))
		b = tf.get_variable('b', [n_state], initializer=tf.constant_initializer(0))
		u = tf.reduce_mean(x, axis=axis, keepdims=True)
		s = tf.reduce_mean(tf.square(x-u), axis=axis, keepdims=True)
		x = (x - u) * tf.rsqrt(s + epsilon)
		x = x*g + b
		return x

Source File: speech_recognition.py From BERT with Apache License 2.0

5 votes

def preprocess_example(self, example, mode, hparams):
    p = hparams
    if p.audio_preproc_in_bottom:
      example["inputs"] = tf.expand_dims(
          tf.expand_dims(example["waveforms"], -1), -1)
    else:
      waveforms = tf.expand_dims(example["waveforms"], 0)
      mel_fbanks = common_audio.compute_mel_filterbank_features(
          waveforms,
          sample_rate=p.audio_sample_rate,
          dither=p.audio_dither,
          preemphasis=p.audio_preemphasis,
          frame_length=p.audio_frame_length,
          frame_step=p.audio_frame_step,
          lower_edge_hertz=p.audio_lower_edge_hertz,
          upper_edge_hertz=p.audio_upper_edge_hertz,
          num_mel_bins=p.audio_num_mel_bins,
          apply_mask=False)
      if p.audio_add_delta_deltas:
        mel_fbanks = common_audio.add_delta_deltas(mel_fbanks)
      fbank_size = common_layers.shape_list(mel_fbanks)
      assert fbank_size[0] == 1

      # This replaces CMVN estimation on data
      var_epsilon = 1e-09
      mean = tf.reduce_mean(mel_fbanks, keepdims=True, axis=1)
      variance = tf.reduce_mean(tf.squared_difference(mel_fbanks, mean),
                                keepdims=True, axis=1)
      mel_fbanks = (mel_fbanks - mean) * tf.rsqrt(variance + var_epsilon)

      # Later models like to flatten the two spatial dims. Instead, we add a
      # unit spatial dim and flatten the frequencies and channels.
      example["inputs"] = tf.concat([
          tf.reshape(mel_fbanks, [fbank_size[1], fbank_size[2], fbank_size[3]]),
          tf.zeros((p.num_zeropad_frames, fbank_size[2], fbank_size[3]))], 0)

    if not p.audio_keep_example_waveforms:
      del example["waveforms"]
    return super(SpeechRecognitionProblem, self
                ).preprocess_example(example, mode, hparams)

Source File: latent_layers.py From BERT with Apache License 2.0

5 votes

def ae_latent_softmax(latents_pred, latents_discrete_hot, vocab_size, hparams):
  """Latent prediction and loss.

  Args:
    latents_pred: Tensor of shape [..., depth].
    latents_discrete_hot: Tensor of shape [..., vocab_size].
    vocab_size: an int representing the vocab size.
    hparams: HParams.

  Returns:
    sample: Tensor of shape [...], a sample from a multinomial distribution.
    loss: Tensor of shape [...], the softmax cross-entropy.
  """
  with tf.variable_scope("latent_logits"):
    latents_logits = tf.layers.dense(latents_pred, vocab_size,
                                     name="logits_dense")
    if hparams.logit_normalization:
      latents_logits *= tf.rsqrt(1e-8 +
                                 tf.reduce_mean(tf.square(latents_logits)))
    loss = tf.nn.softmax_cross_entropy_with_logits_v2(
        labels=latents_discrete_hot, logits=latents_logits)

    # TODO(trandustin): tease this out from ae_latent_softmax.
    # we use just the loss portion to anchor prior / encoder on text.
    sample = multinomial_sample(latents_logits,
                                vocab_size,
                                hparams.sampling_method,
                                hparams.sampling_temp)
    return sample, loss

Source File: common_layers.py From BERT with Apache License 2.0

5 votes

def standardize_images(x):
  """Image standardization on batches and videos."""
  with tf.name_scope("standardize_images", values=[x]):
    x_shape = shape_list(x)
    x = to_float(tf.reshape(x, [-1] + x_shape[-3:]))
    x_mean = tf.reduce_mean(x, axis=[1, 2], keepdims=True)
    x_variance = tf.reduce_mean(
        tf.squared_difference(x, x_mean), axis=[1, 2], keepdims=True)
    num_pixels = to_float(x_shape[-2] * x_shape[-3])
    x = (x - x_mean) / tf.maximum(tf.sqrt(x_variance), tf.rsqrt(num_pixels))
    return tf.reshape(x, x_shape)

Python tensorflow.rsqrt() Examples