Python tensorflow.compat.v2.device() Examples

The following are 13 code examples of tensorflow.compat.v2.device(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.compat.v2 , or try the search function .
Example #1
Source File: trainer.py    From trax with Apache License 2.0 6 votes vote down vote up
def tf_init_tpu(worker='', protocol=None):
  """Initializes TPU for TensorFlow.

  Args:
    worker: The BNS address of the remote TPU worker. If it's empty (the default
      value), TF will assume the TPU devices are connected to the local host.
    protocol: The network protocol used to connect to the TPU worker.
  Returns:
    The device name of the TPU worker's CPU.
  """
  protocol = protocol or 'grpc'
  is_local = (worker in ('', 'local'))
  resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu=worker)
  if not is_local:
    tf.config.experimental_connect_to_cluster(resolver, protocol=protocol)
  tf.tpu.experimental.initialize_tpu_system(resolver)
  if is_local:
    return ''
  else:
    return '/job:worker' 
Example #2
Source File: extensions.py    From trax with Apache License 2.0 5 votes vote down vote up
def __init__(self, tensors):
    """Initializes the ShardedNdArray.

    Note that the tensors should be ordered in the way the pmap producing these
    tensors is run.

    Args:
      tensors: list or tuple of eager tensors, one for each device.
    """

    if not isinstance(tensors, (list, tuple)) or not tensors:
      raise ValueError(
          "Unable to create a ShardedNdArray without a list of tensors.")
    self.tensors = tensors
    self.n_devices = len(tensors) 
Example #3
Source File: extensions.py    From trax with Apache License 2.0 5 votes vote down vote up
def psum(tensor, axis_name=None):
  """Sum all-reduction.

  Args:
    tensor: A tensor.
    axis_name: The axis name to reduce. Must equal to that of the surrounding
      pmap.

  Returns:
    The sum of the `tensor` replicas on each participating devices.
  """
  if axis_name != _pmap_config.axis_name():
    raise ValueError("axis_name (%s) is not equal to that of the surrounding "
                     "pmap (%s)" % (axis_name, _pmap_config.axis_name()))
  devices = _pmap_config.devices()
  if devices is None:
    raise ValueError("Can't retrieve the device list from the surrounding pmap")
  if tpu_devices(devices):
    # TODO(wangpeng): Supply the `group_assignment` argument to
    # tpu.cross_replica_sum, calculated from `devices`.
    return tf.compat.v1.tpu.cross_replica_sum(tensor)
  else:
    return tf.raw_ops.CollectiveReduce(
        input=tensor.data,
        group_size=len(devices),
        group_key=_GROUP_KEY,
        instance_key=_get_instance_key(),
        merge_op="Add",
        final_op="Id",
        subdiv_offsets=(0,))


# Note this is not available in the jax api, but seemed like a reasonable API
# to have. 
Example #4
Source File: extensions.py    From trax with Apache License 2.0 5 votes vote down vote up
def pmean(tensor, axis_name=None):
  """Mean all-reduction.

  Args:
    tensor: A tensor.
    axis_name: The axis name to reduce. Must equal to that of the surrounding
      pmap.

  Returns:
    The mean of the `tensor` replicas on each participating devices.
  """
  if axis_name != _pmap_config.axis_name():
    raise ValueError("axis_name (%s) is not equal to that of the surrounding "
                     "pmap (%s)" % (axis_name, _pmap_config.axis_name()))
  devices = _pmap_config.devices()
  if devices is None:
    raise ValueError("Can't retrieve the device list from the surrounding pmap")
  if tpu_devices(devices):
    # TODO(wangpeng): Implement this.
    raise ValueError("pmean for TPU is not supported yet.")
  else:
    return tf.raw_ops.CollectiveReduce(
        input=tensor.data,
        group_size=len(devices),
        group_key=_GROUP_KEY,
        instance_key=_get_instance_key(),
        merge_op="Add",
        final_op="Div",
        subdiv_offsets=(0,)) 
Example #5
Source File: extensions.py    From trax with Apache License 2.0 5 votes vote down vote up
def tpu_devices(devices=None):
  """Gets TPU devices out of `devices`.

  Args:
    devices: A device list (as a list of strings). If None, the list of all
      available devices will be used for it.

  Returns:
    Those in `devices` that are TPUs.
  """
  return find_devices("TPU", devices) 
Example #6
Source File: trainer.py    From trax with Apache License 2.0 5 votes vote down vote up
def _train_using_tf(output_dir):
  worker_cpu = tf_init_tpu()
  with tf.device(worker_cpu):
    if trainer_lib.num_devices() == 1:
      # TF's device priority is GPU > CPU > TPU, so we need to explicitly make
      # the TPU core the default device here.
      with tf.device('/device:TPU:0'):
        trainer_lib.train(output_dir=output_dir)
    else:
      trainer_lib.train(output_dir=output_dir) 
Example #7
Source File: exporter_lib_v2.py    From models with Apache License 2.0 5 votes vote down vote up
def __call__(self, input_tensor):
    with tf.device('cpu:0'):
      image = tf.map_fn(
          _decode_image,
          elems=input_tensor,
          dtype=tf.uint8,
          parallel_iterations=32,
          back_prop=False)
    return self._run_inference_on_images(image) 
Example #8
Source File: exporter_lib_v2.py    From models with Apache License 2.0 5 votes vote down vote up
def __call__(self, input_tensor):
    with tf.device('cpu:0'):
      image = tf.map_fn(
          _decode_tf_example,
          elems=input_tensor,
          dtype=tf.uint8,
          parallel_iterations=32,
          back_prop=False)
    return self._run_inference_on_images(image) 
Example #9
Source File: extensions.py    From trax with Apache License 2.0 4 votes vote down vote up
def _get_pmap_impl(f, devices, has_tpu):
  """This is a helper function to return the pmap impl.

  Args:
    f: a function that takes ndarrays and returns ndarrays.
    devices: a list of strings; the device list.
    has_tpu: boolean; whether `devices` contains TPU devices.

  Returns:
    A function that takes tensors and returns tensors.
  """
  if has_tpu:
    # Workaround b/121383831
    f = _record_result_type(f)

  def tf_f(*tf_args):
    """A wrapper for `f` that takes/returns tensors."""
    np_args = _tf_to_np(tf_args)
    np_out = f(*np_args)
    return _np_to_tf(np_out)

  if has_tpu:

    @tf.function(autograph=False)
    def fn(inputs):
      # TODO(wangpeng): Supply the `device_assignment` argument to
      # tpu.replicate, calculated from `devices`.
      return tf.compat.v1.tpu.replicate(tf_f, inputs)

    return fn
  else:
    # This is run in a tf.function so that the various underlying functions can
    # be run in parallel.
    # The trace happens on the client, so any devices should not depend on any
    # side effects.

    jit_tf_f = tf.function(tf_f, autograph=False)

    @tf.function(autograph=False)
    def fn(all_per_device_args):
      """Multi-device function with calls placed on the correct device."""

      results = []
      for per_device_args, device in zip(all_per_device_args, devices):
        with tf.device(device):
          results.append(jit_tf_f(*per_device_args))
      return results

    return fn 
Example #10
Source File: continuous_batched.py    From compression with Apache License 2.0 4 votes vote down vote up
def compress(self, bottleneck):
    """Compresses a floating-point tensor.

    Compresses the tensor to bit strings. `bottleneck` is first quantized
    as in `quantize()`, and then compressed using the probability tables derived
    from `self.prior`. The quantized tensor can later be recovered by
    calling `decompress()`.

    The innermost `self.coding_rank` dimensions are treated as one coding unit,
    i.e. are compressed into one string each. Any additional dimensions to the
    left are treated as batch dimensions.

    Arguments:
      bottleneck: `tf.Tensor` containing the data to be compressed. Must have at
        least `self.coding_rank` dimensions, and the innermost dimensions must
        be broadcastable to `self.prior_shape`.

    Returns:
      A `tf.Tensor` having the same shape as `bottleneck` without the
      `self.coding_rank` innermost dimensions, containing a string for each
      coding unit.
    """
    input_shape = tf.shape(bottleneck)
    input_rank = tf.shape(input_shape)[0]
    batch_shape, coding_shape = tf.split(
        input_shape, [input_rank - self.coding_rank, self.coding_rank])
    broadcast_shape = coding_shape[
        :self.coding_rank - len(self.prior_shape)]

    indexes = self._compute_indexes(broadcast_shape)
    if self._quantization_offset is not None:
      bottleneck -= self._quantization_offset
    symbols = tf.cast(tf.round(bottleneck), tf.int32)
    symbols = tf.reshape(symbols, tf.concat([[-1], coding_shape], 0))

    # Prevent tensors from bouncing back and forth between host and GPU.
    with tf.device("/cpu:0"):
      cdf = self.cdf
      cdf_length = self.cdf_length
      cdf_offset = self.cdf_offset
      def loop_body(symbols):
        return range_coding_ops.unbounded_index_range_encode(
            symbols, indexes, cdf, cdf_length, cdf_offset,
            precision=self.range_coder_precision,
            overflow_width=4, debug_level=1)

      # TODO(jonycgn,ssjhv): Consider switching to Python control flow.
      strings = tf.map_fn(
          loop_body, symbols, dtype=tf.string, name="compress")

    strings = tf.reshape(strings, batch_shape)
    return strings 
Example #11
Source File: continuous_batched.py    From compression with Apache License 2.0 4 votes vote down vote up
def decompress(self, strings, broadcast_shape):
    """Decompresses a tensor.

    Reconstructs the quantized tensor from bit strings produced by `compress()`.
    It is necessary to provide a part of the output shape in `broadcast_shape`.

    Arguments:
      strings: `tf.Tensor` containing the compressed bit strings.
      broadcast_shape: Iterable of ints. The part of the output tensor shape
        between the shape of `strings` on the left and
        `self.prior_shape` on the right. This must match the shape
        of the input to `compress()`.

    Returns:
      A `tf.Tensor` of shape `strings.shape + broadcast_shape +
      self.prior_shape`.
    """
    strings = tf.convert_to_tensor(strings, dtype=tf.string)
    broadcast_shape = tf.convert_to_tensor(broadcast_shape, dtype=tf.int32)
    batch_shape = tf.shape(strings)
    symbols_shape = tf.concat(
        [batch_shape, broadcast_shape, self.prior_shape], 0)

    indexes = self._compute_indexes(broadcast_shape)
    strings = tf.reshape(strings, [-1])

    # Prevent tensors from bouncing back and forth between host and GPU.
    with tf.device("/cpu:0"):
      cdf = self.cdf
      cdf_length = self.cdf_length
      cdf_offset = self.cdf_offset
      def loop_body(string):
        return range_coding_ops.unbounded_index_range_decode(
            string, indexes, cdf, cdf_length, cdf_offset,
            precision=self.range_coder_precision,
            overflow_width=4, debug_level=1)

      # TODO(jonycgn,ssjhv): Consider switching to Python control flow.
      symbols = tf.map_fn(
          loop_body, strings, dtype=tf.int32, name="decompress")

    symbols = tf.reshape(symbols, symbols_shape)
    outputs = tf.cast(symbols, self.dtype)
    if self._quantization_offset is not None:
      outputs += self._quantization_offset
    return outputs 
Example #12
Source File: continuous_indexed.py    From compression with Apache License 2.0 4 votes vote down vote up
def compress(self, bottleneck, indexes):
    """Compresses a floating-point tensor.

    Compresses the tensor to bit strings. `bottleneck` is first quantized
    as in `quantize()`, and then compressed using the probability tables derived
    from `indexes`. The quantized tensor can later be recovered by calling
    `decompress()`.

    The innermost `self.coding_rank` dimensions are treated as one coding unit,
    i.e. are compressed into one string each. Any additional dimensions to the
    left are treated as batch dimensions.

    Arguments:
      bottleneck: `tf.Tensor` containing the data to be compressed.
      indexes: `tf.Tensor` specifying the scalar distribution for each element
        in `bottleneck`. See class docstring for examples.

    Returns:
      A `tf.Tensor` having the same shape as `bottleneck` without the
      `self.coding_rank` innermost dimensions, containing a string for each
      coding unit.
    """
    indexes = self._normalize_indexes(indexes)
    flat_indexes = self._flatten_indexes(indexes)

    symbols_shape = tf.shape(flat_indexes)
    batch_shape = symbols_shape[:-self.coding_rank]
    flat_shape = tf.concat([[-1], symbols_shape[-self.coding_rank:]], 0)

    flat_indexes = tf.reshape(flat_indexes, flat_shape)

    offset = helpers.quantization_offset(self._make_prior(indexes))
    symbols = tf.cast(tf.round(bottleneck - offset), tf.int32)
    symbols = tf.reshape(symbols, flat_shape)

    # Prevent tensors from bouncing back and forth between host and GPU.
    with tf.device("/cpu:0"):
      cdf = self.cdf
      cdf_length = self.cdf_length
      cdf_offset = self.cdf_offset
      def loop_body(args):
        return range_coding_ops.unbounded_index_range_encode(
            args[0], args[1], cdf, cdf_length, cdf_offset,
            precision=self.range_coder_precision,
            overflow_width=4, debug_level=1)

      # TODO(jonycgn,ssjhv): Consider switching to Python control flow.
      strings = tf.map_fn(
          loop_body, (symbols, flat_indexes), dtype=tf.string, name="compress")

    strings = tf.reshape(strings, batch_shape)
    return strings 
Example #13
Source File: continuous_indexed.py    From compression with Apache License 2.0 4 votes vote down vote up
def decompress(self, strings, indexes):
    """Decompresses a tensor.

    Reconstructs the quantized tensor from bit strings produced by `compress()`.

    Arguments:
      strings: `tf.Tensor` containing the compressed bit strings.
      indexes: `tf.Tensor` specifying the scalar distribution for each output
        element. See class docstring for examples.

    Returns:
      A `tf.Tensor` of the same shape as `indexes` (without the optional channel
      dimension).
    """
    indexes = self._normalize_indexes(indexes)
    flat_indexes = self._flatten_indexes(indexes)

    symbols_shape = tf.shape(flat_indexes)
    flat_shape = tf.concat([[-1], symbols_shape[-self.coding_rank:]], 0)

    flat_indexes = tf.reshape(flat_indexes, flat_shape)

    strings = tf.reshape(strings, [-1])

    # Prevent tensors from bouncing back and forth between host and GPU.
    with tf.device("/cpu:0"):
      cdf = self.cdf
      cdf_length = self.cdf_length
      cdf_offset = self.cdf_offset
      def loop_body(args):
        return range_coding_ops.unbounded_index_range_decode(
            args[0], args[1], cdf, cdf_length, cdf_offset,
            precision=self.range_coder_precision,
            overflow_width=4, debug_level=1)

      # TODO(jonycgn,ssjhv): Consider switching to Python control flow.
      symbols = tf.map_fn(
          loop_body, (strings, flat_indexes), dtype=tf.int32, name="decompress")

    symbols = tf.reshape(symbols, symbols_shape)
    offset = helpers.quantization_offset(self._make_prior(indexes))
    return tf.cast(symbols, self.dtype) + offset