Python tensorflow.to_bfloat16() Examples

The following are 7 code examples of tensorflow.to_bfloat16(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function

Example #1

Source File: quantization.py From fine-lm with MIT License

6 votes

def _to_bfloat16_unbiased(x, noise):
  """Convert a float32 to a bfloat16 using randomized roundoff.

  Args:
    x: A float32 Tensor.
    noise: a float32 Tensor with values in [0, 1), broadcastable to tf.shape(x)
  Returns:
    A float32 Tensor.
  """
  x_sign = tf.sign(x)
  # Make sure x is positive.  If it is zero, the two candidates are identical.
  x = x * x_sign + 1e-30
  cand1 = tf.to_bfloat16(x)
  cand1_f = tf.to_float(cand1)
  # This relies on the fact that for a positive bfloat16 b,
  # b * 1.005 gives you the next higher bfloat16 and b*0.995 gives you the
  # next lower one. Both 1.005 and 0.995 are ballpark estimation.
  cand2 = tf.to_bfloat16(
      tf.where(tf.greater(x, cand1_f), cand1_f * 1.005, cand1_f * 0.995))
  ret = _randomized_roundoff_to_bfloat16(x, noise, cand1, cand2)
  return ret * tf.to_bfloat16(x_sign)

Example #2

Source File: quantization.py From BERT with Apache License 2.0

6 votes

def _to_bfloat16_unbiased(x, noise):
  """Convert a float32 to a bfloat16 using randomized roundoff.

  Args:
    x: A float32 Tensor.
    noise: a float32 Tensor with values in [0, 1), broadcastable to tf.shape(x)
  Returns:
    A float32 Tensor.
  """
  x_sign = tf.sign(x)
  # Make sure x is positive.  If it is zero, the two candidates are identical.
  x = x * x_sign + 1e-30
  cand1 = tf.to_bfloat16(x)
  cand1_f = tf.to_float(cand1)
  # This relies on the fact that for a positive bfloat16 b,
  # b * 1.005 gives you the next higher bfloat16 and b*0.995 gives you the
  # next lower one. Both 1.005 and 0.995 are ballpark estimation.
  cand2 = tf.to_bfloat16(
      tf.where(tf.greater(x, cand1_f), cand1_f * 1.005, cand1_f * 0.995))
  ret = _randomized_roundoff_to_bfloat16(x, noise, cand1, cand2)
  return ret * tf.to_bfloat16(x_sign)

Example #3

Source File: quantization.py From training_results_v0.5 with Apache License 2.0

6 votes

def _to_bfloat16_unbiased(x, noise):
  """Convert a float32 to a bfloat16 using randomized roundoff.

  Args:
    x: A float32 Tensor.
    noise: a float32 Tensor with values in [0, 1), broadcastable to tf.shape(x)
  Returns:
    A float32 Tensor.
  """
  x_sign = tf.sign(x)
  # Make sure x is positive.  If it is zero, the two candidates are identical.
  x = x * x_sign + 1e-30
  cand1 = tf.to_bfloat16(x)
  cand1_f = tf.to_float(cand1)
  # This relies on the fact that for a positive bfloat16 b,
  # b * 1.005 gives you the next higher bfloat16 and b*0.995 gives you the
  # next lower one. Both 1.005 and 0.995 are ballpark estimation.
  cand2 = tf.to_bfloat16(
      tf.where(tf.greater(x, cand1_f), cand1_f * 1.005, cand1_f * 0.995))
  ret = _randomized_roundoff_to_bfloat16(x, noise, cand1, cand2)
  return ret * tf.to_bfloat16(x_sign)

Example #4

Source File: runtime_support.py From g-tensorflow-models with Apache License 2.0

5 votes

def _blocked_and_dtype_transformations(tensor):
  """Yields variants of a tensor, for standard blocking/dtype variants.

  Args:
    tensor (tf.Tensor): Input tensor.

  Yields:
    (modified_tensor, suffix) pairs, where `modified_tensor` is a transformed
    version of the input, and `suffix` is a string like "/blocked32".
  """
  for blocking_level in (32, 48):
    blocked = make_padded_blocked_matrix(tensor, blocking_level)
    bfloat16_blocked = tf.to_bfloat16(bfloat16_permutation(blocked))
    yield blocked, '/blocked{}'.format(blocking_level)
    yield bfloat16_blocked, '/blocked{}/bfloat16'.format(blocking_level)

Example #5

Source File: runtime_support.py From g-tensorflow-models with Apache License 2.0

5 votes

def bfloat16_permutation(tensor):
  """Permutes values in the last dimension of a tensor.

  This permutation is used so that we can directly use unpacklo/unpackhi AVX2
  instructions on the matrix coefficients. These unpacking instructions
  effectively permute the data. See FastUnpackPermutation() and
  AvxFloatVecArray::Load(const TruncatedFloat16 *) in avx_vector_array.h for
  more details.

  Args:
    tensor: Blocked matrix, the result of make_padded_blocked_matrix(). Must
      have its last dimension a multiple of 16.

  Returns:
    Permuted matrix, suitable for calling tf.to_bfloat16() on. For testing
    convenience we don't do so in this method.

  Raises:
    ValueError: If the matrix's block dimension is not a multiple of 16.
  """
  orig_shape = tensor.shape
  if tensor.shape[-1] % 16 != 0:
    raise ValueError('Bad block dimension, must be divisible by 16')
  permutation = [0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15]
  indices = tf.constant(
      [16 * (i // 16) + permutation[i % 16] for i in xrange(orig_shape[-1])])
  return tf.gather(tensor, indices, axis=len(orig_shape) - 1)

Example #6

Source File: runtime_support.py From multilabel-image-classification-tensorflow with MIT License

5 votes

def _blocked_and_dtype_transformations(tensor):
  """Yields variants of a tensor, for standard blocking/dtype variants.

  Args:
    tensor (tf.Tensor): Input tensor.

  Yields:
    (modified_tensor, suffix) pairs, where `modified_tensor` is a transformed
    version of the input, and `suffix` is a string like "/blocked32".
  """
  for blocking_level in (32, 48):
    blocked = make_padded_blocked_matrix(tensor, blocking_level)
    bfloat16_blocked = tf.to_bfloat16(bfloat16_permutation(blocked))
    yield blocked, '/blocked{}'.format(blocking_level)
    yield bfloat16_blocked, '/blocked{}/bfloat16'.format(blocking_level)

Example #7

Source File: runtime_support.py From multilabel-image-classification-tensorflow with MIT License

5 votes

def bfloat16_permutation(tensor):
  """Permutes values in the last dimension of a tensor.

  This permutation is used so that we can directly use unpacklo/unpackhi AVX2
  instructions on the matrix coefficients. These unpacking instructions
  effectively permute the data. See FastUnpackPermutation() and
  AvxFloatVecArray::Load(const TruncatedFloat16 *) in avx_vector_array.h for
  more details.

  Args:
    tensor: Blocked matrix, the result of make_padded_blocked_matrix(). Must
      have its last dimension a multiple of 16.

  Returns:
    Permuted matrix, suitable for calling tf.to_bfloat16() on. For testing
    convenience we don't do so in this method.

  Raises:
    ValueError: If the matrix's block dimension is not a multiple of 16.
  """
  orig_shape = tensor.shape
  if tensor.shape[-1] % 16 != 0:
    raise ValueError('Bad block dimension, must be divisible by 16')
  permutation = [0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15]
  indices = tf.constant(
      [16 * (i // 16) + permutation[i % 16] for i in xrange(orig_shape[-1])])
  return tf.gather(tensor, indices, axis=len(orig_shape) - 1)