Python Examples of tensorflow.compat.v2.device

Source File: trainer.py From trax with Apache License 2.0

6 votes

def tf_init_tpu(worker='', protocol=None):
  """Initializes TPU for TensorFlow.

  Args:
    worker: The BNS address of the remote TPU worker. If it's empty (the default
      value), TF will assume the TPU devices are connected to the local host.
    protocol: The network protocol used to connect to the TPU worker.
  Returns:
    The device name of the TPU worker's CPU.
  """
  protocol = protocol or 'grpc'
  is_local = (worker in ('', 'local'))
  resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu=worker)
  if not is_local:
    tf.config.experimental_connect_to_cluster(resolver, protocol=protocol)
  tf.tpu.experimental.initialize_tpu_system(resolver)
  if is_local:
    return ''
  else:
    return '/job:worker'

Source File: extensions.py From trax with Apache License 2.0

5 votes

def __init__(self, tensors):
    """Initializes the ShardedNdArray.

    Note that the tensors should be ordered in the way the pmap producing these
    tensors is run.

    Args:
      tensors: list or tuple of eager tensors, one for each device.
    """

    if not isinstance(tensors, (list, tuple)) or not tensors:
      raise ValueError(
          "Unable to create a ShardedNdArray without a list of tensors.")
    self.tensors = tensors
    self.n_devices = len(tensors)

Source File: extensions.py From trax with Apache License 2.0

5 votes

def psum(tensor, axis_name=None):
  """Sum all-reduction.

  Args:
    tensor: A tensor.
    axis_name: The axis name to reduce. Must equal to that of the surrounding
      pmap.

  Returns:
    The sum of the `tensor` replicas on each participating devices.
  """
  if axis_name != _pmap_config.axis_name():
    raise ValueError("axis_name (%s) is not equal to that of the surrounding "
                     "pmap (%s)" % (axis_name, _pmap_config.axis_name()))
  devices = _pmap_config.devices()
  if devices is None:
    raise ValueError("Can't retrieve the device list from the surrounding pmap")
  if tpu_devices(devices):
    # TODO(wangpeng): Supply the `group_assignment` argument to
    # tpu.cross_replica_sum, calculated from `devices`.
    return tf.compat.v1.tpu.cross_replica_sum(tensor)
  else:
    return tf.raw_ops.CollectiveReduce(
        input=tensor.data,
        group_size=len(devices),
        group_key=_GROUP_KEY,
        instance_key=_get_instance_key(),
        merge_op="Add",
        final_op="Id",
        subdiv_offsets=(0,))


# Note this is not available in the jax api, but seemed like a reasonable API
# to have.

Source File: extensions.py From trax with Apache License 2.0

5 votes

def pmean(tensor, axis_name=None):
  """Mean all-reduction.

  Args:
    tensor: A tensor.
    axis_name: The axis name to reduce. Must equal to that of the surrounding
      pmap.

  Returns:
    The mean of the `tensor` replicas on each participating devices.
  """
  if axis_name != _pmap_config.axis_name():
    raise ValueError("axis_name (%s) is not equal to that of the surrounding "
                     "pmap (%s)" % (axis_name, _pmap_config.axis_name()))
  devices = _pmap_config.devices()
  if devices is None:
    raise ValueError("Can't retrieve the device list from the surrounding pmap")
  if tpu_devices(devices):
    # TODO(wangpeng): Implement this.
    raise ValueError("pmean for TPU is not supported yet.")
  else:
    return tf.raw_ops.CollectiveReduce(
        input=tensor.data,
        group_size=len(devices),
        group_key=_GROUP_KEY,
        instance_key=_get_instance_key(),
        merge_op="Add",
        final_op="Div",
        subdiv_offsets=(0,))

Source File: extensions.py From trax with Apache License 2.0

5 votes

def tpu_devices(devices=None):
  """Gets TPU devices out of `devices`.

  Args:
    devices: A device list (as a list of strings). If None, the list of all
      available devices will be used for it.

  Returns:
    Those in `devices` that are TPUs.
  """
  return find_devices("TPU", devices)

Source File: trainer.py From trax with Apache License 2.0

5 votes

def _train_using_tf(output_dir):
  worker_cpu = tf_init_tpu()
  with tf.device(worker_cpu):
    if trainer_lib.num_devices() == 1:
      # TF's device priority is GPU > CPU > TPU, so we need to explicitly make
      # the TPU core the default device here.
      with tf.device('/device:TPU:0'):
        trainer_lib.train(output_dir=output_dir)
    else:
      trainer_lib.train(output_dir=output_dir)

Source File: exporter_lib_v2.py From models with Apache License 2.0

5 votes

def __call__(self, input_tensor):
    with tf.device('cpu:0'):
      image = tf.map_fn(
          _decode_image,
          elems=input_tensor,
          dtype=tf.uint8,
          parallel_iterations=32,
          back_prop=False)
    return self._run_inference_on_images(image)

Source File: exporter_lib_v2.py From models with Apache License 2.0

5 votes

def __call__(self, input_tensor):
    with tf.device('cpu:0'):
      image = tf.map_fn(
          _decode_tf_example,
          elems=input_tensor,
          dtype=tf.uint8,
          parallel_iterations=32,
          back_prop=False)
    return self._run_inference_on_images(image)

Source File: extensions.py From trax with Apache License 2.0

4 votes

def _get_pmap_impl(f, devices, has_tpu):
  """This is a helper function to return the pmap impl.

  Args:
    f: a function that takes ndarrays and returns ndarrays.
    devices: a list of strings; the device list.
    has_tpu: boolean; whether `devices` contains TPU devices.

  Returns:
    A function that takes tensors and returns tensors.
  """
  if has_tpu:
    # Workaround b/121383831
    f = _record_result_type(f)

  def tf_f(*tf_args):
    """A wrapper for `f` that takes/returns tensors."""
    np_args = _tf_to_np(tf_args)
    np_out = f(*np_args)
    return _np_to_tf(np_out)

  if has_tpu:

    @tf.function(autograph=False)
    def fn(inputs):
      # TODO(wangpeng): Supply the `device_assignment` argument to
      # tpu.replicate, calculated from `devices`.
      return tf.compat.v1.tpu.replicate(tf_f, inputs)

    return fn
  else:
    # This is run in a tf.function so that the various underlying functions can
    # be run in parallel.
    # The trace happens on the client, so any devices should not depend on any
    # side effects.

    jit_tf_f = tf.function(tf_f, autograph=False)

    @tf.function(autograph=False)
    def fn(all_per_device_args):
      """Multi-device function with calls placed on the correct device."""

      results = []
      for per_device_args, device in zip(all_per_device_args, devices):
        with tf.device(device):
          results.append(jit_tf_f(*per_device_args))
      return results

    return fn

Source File: continuous_batched.py From compression with Apache License 2.0

4 votes

def compress(self, bottleneck):
    """Compresses a floating-point tensor.

    Compresses the tensor to bit strings. `bottleneck` is first quantized
    as in `quantize()`, and then compressed using the probability tables derived
    from `self.prior`. The quantized tensor can later be recovered by
    calling `decompress()`.

    The innermost `self.coding_rank` dimensions are treated as one coding unit,
    i.e. are compressed into one string each. Any additional dimensions to the
    left are treated as batch dimensions.

    Arguments:
      bottleneck: `tf.Tensor` containing the data to be compressed. Must have at
        least `self.coding_rank` dimensions, and the innermost dimensions must
        be broadcastable to `self.prior_shape`.

    Returns:
      A `tf.Tensor` having the same shape as `bottleneck` without the
      `self.coding_rank` innermost dimensions, containing a string for each
      coding unit.
    """
    input_shape = tf.shape(bottleneck)
    input_rank = tf.shape(input_shape)[0]
    batch_shape, coding_shape = tf.split(
        input_shape, [input_rank - self.coding_rank, self.coding_rank])
    broadcast_shape = coding_shape[
        :self.coding_rank - len(self.prior_shape)]

    indexes = self._compute_indexes(broadcast_shape)
    if self._quantization_offset is not None:
      bottleneck -= self._quantization_offset
    symbols = tf.cast(tf.round(bottleneck), tf.int32)
    symbols = tf.reshape(symbols, tf.concat([[-1], coding_shape], 0))

    # Prevent tensors from bouncing back and forth between host and GPU.
    with tf.device("/cpu:0"):
      cdf = self.cdf
      cdf_length = self.cdf_length
      cdf_offset = self.cdf_offset
      def loop_body(symbols):
        return range_coding_ops.unbounded_index_range_encode(
            symbols, indexes, cdf, cdf_length, cdf_offset,
            precision=self.range_coder_precision,
            overflow_width=4, debug_level=1)

      # TODO(jonycgn,ssjhv): Consider switching to Python control flow.
      strings = tf.map_fn(
          loop_body, symbols, dtype=tf.string, name="compress")

    strings = tf.reshape(strings, batch_shape)
    return strings

Source File: continuous_batched.py From compression with Apache License 2.0

4 votes

def decompress(self, strings, broadcast_shape):
    """Decompresses a tensor.

    Reconstructs the quantized tensor from bit strings produced by `compress()`.
    It is necessary to provide a part of the output shape in `broadcast_shape`.

    Arguments:
      strings: `tf.Tensor` containing the compressed bit strings.
      broadcast_shape: Iterable of ints. The part of the output tensor shape
        between the shape of `strings` on the left and
        `self.prior_shape` on the right. This must match the shape
        of the input to `compress()`.

    Returns:
      A `tf.Tensor` of shape `strings.shape + broadcast_shape +
      self.prior_shape`.
    """
    strings = tf.convert_to_tensor(strings, dtype=tf.string)
    broadcast_shape = tf.convert_to_tensor(broadcast_shape, dtype=tf.int32)
    batch_shape = tf.shape(strings)
    symbols_shape = tf.concat(
        [batch_shape, broadcast_shape, self.prior_shape], 0)

    indexes = self._compute_indexes(broadcast_shape)
    strings = tf.reshape(strings, [-1])

    # Prevent tensors from bouncing back and forth between host and GPU.
    with tf.device("/cpu:0"):
      cdf = self.cdf
      cdf_length = self.cdf_length
      cdf_offset = self.cdf_offset
      def loop_body(string):
        return range_coding_ops.unbounded_index_range_decode(
            string, indexes, cdf, cdf_length, cdf_offset,
            precision=self.range_coder_precision,
            overflow_width=4, debug_level=1)

      # TODO(jonycgn,ssjhv): Consider switching to Python control flow.
      symbols = tf.map_fn(
          loop_body, strings, dtype=tf.int32, name="decompress")

    symbols = tf.reshape(symbols, symbols_shape)
    outputs = tf.cast(symbols, self.dtype)
    if self._quantization_offset is not None:
      outputs += self._quantization_offset
    return outputs

Source File: continuous_indexed.py From compression with Apache License 2.0

4 votes

def compress(self, bottleneck, indexes):
    """Compresses a floating-point tensor.

    Compresses the tensor to bit strings. `bottleneck` is first quantized
    as in `quantize()`, and then compressed using the probability tables derived
    from `indexes`. The quantized tensor can later be recovered by calling
    `decompress()`.

    The innermost `self.coding_rank` dimensions are treated as one coding unit,
    i.e. are compressed into one string each. Any additional dimensions to the
    left are treated as batch dimensions.

    Arguments:
      bottleneck: `tf.Tensor` containing the data to be compressed.
      indexes: `tf.Tensor` specifying the scalar distribution for each element
        in `bottleneck`. See class docstring for examples.

    Returns:
      A `tf.Tensor` having the same shape as `bottleneck` without the
      `self.coding_rank` innermost dimensions, containing a string for each
      coding unit.
    """
    indexes = self._normalize_indexes(indexes)
    flat_indexes = self._flatten_indexes(indexes)

    symbols_shape = tf.shape(flat_indexes)
    batch_shape = symbols_shape[:-self.coding_rank]
    flat_shape = tf.concat([[-1], symbols_shape[-self.coding_rank:]], 0)

    flat_indexes = tf.reshape(flat_indexes, flat_shape)

    offset = helpers.quantization_offset(self._make_prior(indexes))
    symbols = tf.cast(tf.round(bottleneck - offset), tf.int32)
    symbols = tf.reshape(symbols, flat_shape)

    # Prevent tensors from bouncing back and forth between host and GPU.
    with tf.device("/cpu:0"):
      cdf = self.cdf
      cdf_length = self.cdf_length
      cdf_offset = self.cdf_offset
      def loop_body(args):
        return range_coding_ops.unbounded_index_range_encode(
            args[0], args[1], cdf, cdf_length, cdf_offset,
            precision=self.range_coder_precision,
            overflow_width=4, debug_level=1)

      # TODO(jonycgn,ssjhv): Consider switching to Python control flow.
      strings = tf.map_fn(
          loop_body, (symbols, flat_indexes), dtype=tf.string, name="compress")

    strings = tf.reshape(strings, batch_shape)
    return strings

Source File: continuous_indexed.py From compression with Apache License 2.0

4 votes

def decompress(self, strings, indexes):
    """Decompresses a tensor.

    Reconstructs the quantized tensor from bit strings produced by `compress()`.

    Arguments:
      strings: `tf.Tensor` containing the compressed bit strings.
      indexes: `tf.Tensor` specifying the scalar distribution for each output
        element. See class docstring for examples.

    Returns:
      A `tf.Tensor` of the same shape as `indexes` (without the optional channel
      dimension).
    """
    indexes = self._normalize_indexes(indexes)
    flat_indexes = self._flatten_indexes(indexes)

    symbols_shape = tf.shape(flat_indexes)
    flat_shape = tf.concat([[-1], symbols_shape[-self.coding_rank:]], 0)

    flat_indexes = tf.reshape(flat_indexes, flat_shape)

    strings = tf.reshape(strings, [-1])

    # Prevent tensors from bouncing back and forth between host and GPU.
    with tf.device("/cpu:0"):
      cdf = self.cdf
      cdf_length = self.cdf_length
      cdf_offset = self.cdf_offset
      def loop_body(args):
        return range_coding_ops.unbounded_index_range_decode(
            args[0], args[1], cdf, cdf_length, cdf_offset,
            precision=self.range_coder_precision,
            overflow_width=4, debug_level=1)

      # TODO(jonycgn,ssjhv): Consider switching to Python control flow.
      symbols = tf.map_fn(
          loop_body, (strings, flat_indexes), dtype=tf.int32, name="decompress")

    symbols = tf.reshape(symbols, symbols_shape)
    offset = helpers.quantization_offset(self._make_prior(indexes))
    return tf.cast(symbols, self.dtype) + offset

Python tensorflow.compat.v2.device() Examples