Python tensorflow.compat.v2.device() Examples
The following are 13
code examples of tensorflow.compat.v2.device().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.compat.v2
, or try the search function
.
Example #1
Source File: trainer.py From trax with Apache License 2.0 | 6 votes |
def tf_init_tpu(worker='', protocol=None): """Initializes TPU for TensorFlow. Args: worker: The BNS address of the remote TPU worker. If it's empty (the default value), TF will assume the TPU devices are connected to the local host. protocol: The network protocol used to connect to the TPU worker. Returns: The device name of the TPU worker's CPU. """ protocol = protocol or 'grpc' is_local = (worker in ('', 'local')) resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu=worker) if not is_local: tf.config.experimental_connect_to_cluster(resolver, protocol=protocol) tf.tpu.experimental.initialize_tpu_system(resolver) if is_local: return '' else: return '/job:worker'
Example #2
Source File: extensions.py From trax with Apache License 2.0 | 5 votes |
def __init__(self, tensors): """Initializes the ShardedNdArray. Note that the tensors should be ordered in the way the pmap producing these tensors is run. Args: tensors: list or tuple of eager tensors, one for each device. """ if not isinstance(tensors, (list, tuple)) or not tensors: raise ValueError( "Unable to create a ShardedNdArray without a list of tensors.") self.tensors = tensors self.n_devices = len(tensors)
Example #3
Source File: extensions.py From trax with Apache License 2.0 | 5 votes |
def psum(tensor, axis_name=None): """Sum all-reduction. Args: tensor: A tensor. axis_name: The axis name to reduce. Must equal to that of the surrounding pmap. Returns: The sum of the `tensor` replicas on each participating devices. """ if axis_name != _pmap_config.axis_name(): raise ValueError("axis_name (%s) is not equal to that of the surrounding " "pmap (%s)" % (axis_name, _pmap_config.axis_name())) devices = _pmap_config.devices() if devices is None: raise ValueError("Can't retrieve the device list from the surrounding pmap") if tpu_devices(devices): # TODO(wangpeng): Supply the `group_assignment` argument to # tpu.cross_replica_sum, calculated from `devices`. return tf.compat.v1.tpu.cross_replica_sum(tensor) else: return tf.raw_ops.CollectiveReduce( input=tensor.data, group_size=len(devices), group_key=_GROUP_KEY, instance_key=_get_instance_key(), merge_op="Add", final_op="Id", subdiv_offsets=(0,)) # Note this is not available in the jax api, but seemed like a reasonable API # to have.
Example #4
Source File: extensions.py From trax with Apache License 2.0 | 5 votes |
def pmean(tensor, axis_name=None): """Mean all-reduction. Args: tensor: A tensor. axis_name: The axis name to reduce. Must equal to that of the surrounding pmap. Returns: The mean of the `tensor` replicas on each participating devices. """ if axis_name != _pmap_config.axis_name(): raise ValueError("axis_name (%s) is not equal to that of the surrounding " "pmap (%s)" % (axis_name, _pmap_config.axis_name())) devices = _pmap_config.devices() if devices is None: raise ValueError("Can't retrieve the device list from the surrounding pmap") if tpu_devices(devices): # TODO(wangpeng): Implement this. raise ValueError("pmean for TPU is not supported yet.") else: return tf.raw_ops.CollectiveReduce( input=tensor.data, group_size=len(devices), group_key=_GROUP_KEY, instance_key=_get_instance_key(), merge_op="Add", final_op="Div", subdiv_offsets=(0,))
Example #5
Source File: extensions.py From trax with Apache License 2.0 | 5 votes |
def tpu_devices(devices=None): """Gets TPU devices out of `devices`. Args: devices: A device list (as a list of strings). If None, the list of all available devices will be used for it. Returns: Those in `devices` that are TPUs. """ return find_devices("TPU", devices)
Example #6
Source File: trainer.py From trax with Apache License 2.0 | 5 votes |
def _train_using_tf(output_dir): worker_cpu = tf_init_tpu() with tf.device(worker_cpu): if trainer_lib.num_devices() == 1: # TF's device priority is GPU > CPU > TPU, so we need to explicitly make # the TPU core the default device here. with tf.device('/device:TPU:0'): trainer_lib.train(output_dir=output_dir) else: trainer_lib.train(output_dir=output_dir)
Example #7
Source File: exporter_lib_v2.py From models with Apache License 2.0 | 5 votes |
def __call__(self, input_tensor): with tf.device('cpu:0'): image = tf.map_fn( _decode_image, elems=input_tensor, dtype=tf.uint8, parallel_iterations=32, back_prop=False) return self._run_inference_on_images(image)
Example #8
Source File: exporter_lib_v2.py From models with Apache License 2.0 | 5 votes |
def __call__(self, input_tensor): with tf.device('cpu:0'): image = tf.map_fn( _decode_tf_example, elems=input_tensor, dtype=tf.uint8, parallel_iterations=32, back_prop=False) return self._run_inference_on_images(image)
Example #9
Source File: extensions.py From trax with Apache License 2.0 | 4 votes |
def _get_pmap_impl(f, devices, has_tpu): """This is a helper function to return the pmap impl. Args: f: a function that takes ndarrays and returns ndarrays. devices: a list of strings; the device list. has_tpu: boolean; whether `devices` contains TPU devices. Returns: A function that takes tensors and returns tensors. """ if has_tpu: # Workaround b/121383831 f = _record_result_type(f) def tf_f(*tf_args): """A wrapper for `f` that takes/returns tensors.""" np_args = _tf_to_np(tf_args) np_out = f(*np_args) return _np_to_tf(np_out) if has_tpu: @tf.function(autograph=False) def fn(inputs): # TODO(wangpeng): Supply the `device_assignment` argument to # tpu.replicate, calculated from `devices`. return tf.compat.v1.tpu.replicate(tf_f, inputs) return fn else: # This is run in a tf.function so that the various underlying functions can # be run in parallel. # The trace happens on the client, so any devices should not depend on any # side effects. jit_tf_f = tf.function(tf_f, autograph=False) @tf.function(autograph=False) def fn(all_per_device_args): """Multi-device function with calls placed on the correct device.""" results = [] for per_device_args, device in zip(all_per_device_args, devices): with tf.device(device): results.append(jit_tf_f(*per_device_args)) return results return fn
Example #10
Source File: continuous_batched.py From compression with Apache License 2.0 | 4 votes |
def compress(self, bottleneck): """Compresses a floating-point tensor. Compresses the tensor to bit strings. `bottleneck` is first quantized as in `quantize()`, and then compressed using the probability tables derived from `self.prior`. The quantized tensor can later be recovered by calling `decompress()`. The innermost `self.coding_rank` dimensions are treated as one coding unit, i.e. are compressed into one string each. Any additional dimensions to the left are treated as batch dimensions. Arguments: bottleneck: `tf.Tensor` containing the data to be compressed. Must have at least `self.coding_rank` dimensions, and the innermost dimensions must be broadcastable to `self.prior_shape`. Returns: A `tf.Tensor` having the same shape as `bottleneck` without the `self.coding_rank` innermost dimensions, containing a string for each coding unit. """ input_shape = tf.shape(bottleneck) input_rank = tf.shape(input_shape)[0] batch_shape, coding_shape = tf.split( input_shape, [input_rank - self.coding_rank, self.coding_rank]) broadcast_shape = coding_shape[ :self.coding_rank - len(self.prior_shape)] indexes = self._compute_indexes(broadcast_shape) if self._quantization_offset is not None: bottleneck -= self._quantization_offset symbols = tf.cast(tf.round(bottleneck), tf.int32) symbols = tf.reshape(symbols, tf.concat([[-1], coding_shape], 0)) # Prevent tensors from bouncing back and forth between host and GPU. with tf.device("/cpu:0"): cdf = self.cdf cdf_length = self.cdf_length cdf_offset = self.cdf_offset def loop_body(symbols): return range_coding_ops.unbounded_index_range_encode( symbols, indexes, cdf, cdf_length, cdf_offset, precision=self.range_coder_precision, overflow_width=4, debug_level=1) # TODO(jonycgn,ssjhv): Consider switching to Python control flow. strings = tf.map_fn( loop_body, symbols, dtype=tf.string, name="compress") strings = tf.reshape(strings, batch_shape) return strings
Example #11
Source File: continuous_batched.py From compression with Apache License 2.0 | 4 votes |
def decompress(self, strings, broadcast_shape): """Decompresses a tensor. Reconstructs the quantized tensor from bit strings produced by `compress()`. It is necessary to provide a part of the output shape in `broadcast_shape`. Arguments: strings: `tf.Tensor` containing the compressed bit strings. broadcast_shape: Iterable of ints. The part of the output tensor shape between the shape of `strings` on the left and `self.prior_shape` on the right. This must match the shape of the input to `compress()`. Returns: A `tf.Tensor` of shape `strings.shape + broadcast_shape + self.prior_shape`. """ strings = tf.convert_to_tensor(strings, dtype=tf.string) broadcast_shape = tf.convert_to_tensor(broadcast_shape, dtype=tf.int32) batch_shape = tf.shape(strings) symbols_shape = tf.concat( [batch_shape, broadcast_shape, self.prior_shape], 0) indexes = self._compute_indexes(broadcast_shape) strings = tf.reshape(strings, [-1]) # Prevent tensors from bouncing back and forth between host and GPU. with tf.device("/cpu:0"): cdf = self.cdf cdf_length = self.cdf_length cdf_offset = self.cdf_offset def loop_body(string): return range_coding_ops.unbounded_index_range_decode( string, indexes, cdf, cdf_length, cdf_offset, precision=self.range_coder_precision, overflow_width=4, debug_level=1) # TODO(jonycgn,ssjhv): Consider switching to Python control flow. symbols = tf.map_fn( loop_body, strings, dtype=tf.int32, name="decompress") symbols = tf.reshape(symbols, symbols_shape) outputs = tf.cast(symbols, self.dtype) if self._quantization_offset is not None: outputs += self._quantization_offset return outputs
Example #12
Source File: continuous_indexed.py From compression with Apache License 2.0 | 4 votes |
def compress(self, bottleneck, indexes): """Compresses a floating-point tensor. Compresses the tensor to bit strings. `bottleneck` is first quantized as in `quantize()`, and then compressed using the probability tables derived from `indexes`. The quantized tensor can later be recovered by calling `decompress()`. The innermost `self.coding_rank` dimensions are treated as one coding unit, i.e. are compressed into one string each. Any additional dimensions to the left are treated as batch dimensions. Arguments: bottleneck: `tf.Tensor` containing the data to be compressed. indexes: `tf.Tensor` specifying the scalar distribution for each element in `bottleneck`. See class docstring for examples. Returns: A `tf.Tensor` having the same shape as `bottleneck` without the `self.coding_rank` innermost dimensions, containing a string for each coding unit. """ indexes = self._normalize_indexes(indexes) flat_indexes = self._flatten_indexes(indexes) symbols_shape = tf.shape(flat_indexes) batch_shape = symbols_shape[:-self.coding_rank] flat_shape = tf.concat([[-1], symbols_shape[-self.coding_rank:]], 0) flat_indexes = tf.reshape(flat_indexes, flat_shape) offset = helpers.quantization_offset(self._make_prior(indexes)) symbols = tf.cast(tf.round(bottleneck - offset), tf.int32) symbols = tf.reshape(symbols, flat_shape) # Prevent tensors from bouncing back and forth between host and GPU. with tf.device("/cpu:0"): cdf = self.cdf cdf_length = self.cdf_length cdf_offset = self.cdf_offset def loop_body(args): return range_coding_ops.unbounded_index_range_encode( args[0], args[1], cdf, cdf_length, cdf_offset, precision=self.range_coder_precision, overflow_width=4, debug_level=1) # TODO(jonycgn,ssjhv): Consider switching to Python control flow. strings = tf.map_fn( loop_body, (symbols, flat_indexes), dtype=tf.string, name="compress") strings = tf.reshape(strings, batch_shape) return strings
Example #13
Source File: continuous_indexed.py From compression with Apache License 2.0 | 4 votes |
def decompress(self, strings, indexes): """Decompresses a tensor. Reconstructs the quantized tensor from bit strings produced by `compress()`. Arguments: strings: `tf.Tensor` containing the compressed bit strings. indexes: `tf.Tensor` specifying the scalar distribution for each output element. See class docstring for examples. Returns: A `tf.Tensor` of the same shape as `indexes` (without the optional channel dimension). """ indexes = self._normalize_indexes(indexes) flat_indexes = self._flatten_indexes(indexes) symbols_shape = tf.shape(flat_indexes) flat_shape = tf.concat([[-1], symbols_shape[-self.coding_rank:]], 0) flat_indexes = tf.reshape(flat_indexes, flat_shape) strings = tf.reshape(strings, [-1]) # Prevent tensors from bouncing back and forth between host and GPU. with tf.device("/cpu:0"): cdf = self.cdf cdf_length = self.cdf_length cdf_offset = self.cdf_offset def loop_body(args): return range_coding_ops.unbounded_index_range_decode( args[0], args[1], cdf, cdf_length, cdf_offset, precision=self.range_coder_precision, overflow_width=4, debug_level=1) # TODO(jonycgn,ssjhv): Consider switching to Python control flow. symbols = tf.map_fn( loop_body, (strings, flat_indexes), dtype=tf.int32, name="decompress") symbols = tf.reshape(symbols, symbols_shape) offset = helpers.quantization_offset(self._make_prior(indexes)) return tf.cast(symbols, self.dtype) + offset