Python tensorflow.colocate_with() Examples

The following are 30 code examples of tensorflow.colocate_with(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function .
Example #1
Source File: native_module.py    From hub with Apache License 2.0 6 votes vote down vote up
def find_signature_inputs_from_multivalued_ops(inputs):
  """Returns error message for module inputs from ops with multiple outputs."""
  dense_inputs = []  # List of (str, Tensor), with SparseTensors decomposed.
  for name, tensor in sorted(inputs.items()):
    if isinstance(tensor, tf.SparseTensor):
      dense_inputs.extend(("%s.%s" % (name, attr), getattr(tensor, attr))
                          for attr in ("indices", "values", "dense_shape"))
    else:
      dense_inputs.append((name, tensor))
  warnings = [(name, tensor.name) for name, tensor in dense_inputs
              if len(tensor.op.outputs) != 1]
  if warnings:
    return (
        "WARNING: The inputs declared in hub.add_signature() should be tensors "
        "from ops with a single output, or else uses of tf.colocate_with() on "
        "that op can trigger fatal errors when the module is applied and "
        "colocation constraints have to be rewritten.\nAffected inputs: %s" %
        ", ".join("%s='%s'" % pair for pair in warnings))
  return None 
Example #2
Source File: batch_allreduce.py    From training_results_v0.5 with Apache License 2.0 6 votes vote down vote up
def concat_all_device_tensors(self, all_device_tensors):
    """For each device, concatenate the device's tensors into a single tensor.

    Args:
      all_device_tensors: A list of list of tensors. `all_device_tensors[i][j]`
        is a tensor where `i` is the device index and `j` is the tensor index.

    Returns:
      A list of list of tensors in a similar form as all_device_tensors, except
      the tensors on each device have been concatenated. Each inner list
      consists of a single concatenated tensor.
    """
    assert self._next_method == 'concat'
    new_all_device_tensors = []
    tensor_states = []
    for device_tensors in all_device_tensors:
      with tf.colocate_with(device_tensors[0]):
        concat_tensor, tensor_state = self._concat_tensors(device_tensors)
        new_all_device_tensors.append([concat_tensor])
        tensor_states.append(tensor_state)
    self._tensor_states = tensor_states
    self._next_method = 'split'
    return new_all_device_tensors 
Example #3
Source File: layer_collection.py    From kfac with Apache License 2.0 6 votes vote down vote up
def eval_losses(self, target_mode="data", coeff_mode="regular"):
    """Returns evaluated losses (colocated with inputs to losses)."""
    evals = []
    for loss in self.losses:
      with tf.colocate_with(self.loss_colocation_ops[loss]):
        if target_mode == "data":
          loss_value = loss.evaluate()
        elif target_mode == "sample":
          loss_value = loss.evaluate_on_sample()
        else:
          raise ValueError("target_mode must be in ['data', 'sample']")

        if coeff_mode == "regular":
          multiplier = self.loss_coeffs[loss]
        elif coeff_mode == "sqrt":
          multiplier = tf.sqrt(self.loss_coeffs[loss])
        elif coeff_mode == "off":
          multiplier = 1.0
        else:
          raise ValueError("coeff_mode must be in ['regular', 'sqrt', 'off']")
        multiplier = tf.cast(multiplier, dtype=loss_value.dtype)
        evals.append(multiplier * loss_value)
    return evals 
Example #4
Source File: batch_allreduce.py    From training_results_v0.5 with Apache License 2.0 6 votes vote down vote up
def concat_all_device_tensors(self, all_device_tensors):
    """For each device, concatenate the device's tensors into a single tensor.

    Args:
      all_device_tensors: A list of list of tensors. `all_device_tensors[i][j]`
        is a tensor where `i` is the device index and `j` is the tensor index.

    Returns:
      A list of list of tensors in a similar form as all_device_tensors, except
      the tensors on each device have been concatenated. Each inner list
      consists of a single concatenated tensor.
    """
    assert self._next_method == 'concat'
    new_all_device_tensors = []
    tensor_states = []
    for device_tensors in all_device_tensors:
      with tf.colocate_with(device_tensors[0]):
        concat_tensor, tensor_state = self._concat_tensors(device_tensors)
        new_all_device_tensors.append([concat_tensor])
        tensor_states.append(tensor_state)
    self._tensor_states = tensor_states
    self._next_method = 'split'
    return new_all_device_tensors 
Example #5
Source File: batch_allreduce.py    From training_results_v0.5 with Apache License 2.0 6 votes vote down vote up
def concat_all_device_tensors(self, all_device_tensors):
    """For each device, concatenate the device's tensors into a single tensor.

    Args:
      all_device_tensors: A list of list of tensors. `all_device_tensors[i][j]`
        is a tensor where `i` is the device index and `j` is the tensor index.

    Returns:
      A list of list of tensors in a similar form as all_device_tensors, except
      the tensors on each device have been concatenated. Each inner list
      consists of a single concatenated tensor.
    """
    assert self._next_method == 'concat'
    new_all_device_tensors = []
    tensor_states = []
    for device_tensors in all_device_tensors:
      with tf.colocate_with(device_tensors[0]):
        concat_tensor, tensor_state = self._concat_tensors(device_tensors)
        new_all_device_tensors.append([concat_tensor])
        tensor_states.append(tensor_state)
    self._tensor_states = tensor_states
    self._next_method = 'split'
    return new_all_device_tensors 
Example #6
Source File: estimator.py    From kfac with Apache License 2.0 6 votes vote down vote up
def _get_transformed_random_signs(self):
    if self.mat_type == "Fisher":
      mult_func = lambda loss, index: loss.multiply_fisher_factor(index)
      inner_shape_func = lambda loss: loss.fisher_factor_inner_shape
    elif self.mat_type == "GGN":
      mult_func = lambda loss, index: loss.multiply_ggn_factor(index)
      inner_shape_func = lambda loss: loss.ggn_factor_inner_shape

    transformed_random_signs = []
    for loss in self.layers.losses:
      with tf.colocate_with(self.layers.loss_colocation_ops[loss]):
        value = mult_func(loss,
                          utils.generate_random_signs(inner_shape_func(loss),
                                                      dtype=loss.dtype))
        coeff = tf.cast(self.layers.loss_coeffs[loss], dtype=value.dtype)
        transformed_random_signs.append(tf.sqrt(coeff) * value)
    return transformed_random_signs 
Example #7
Source File: discretization.py    From BERT with Apache License 2.0 6 votes vote down vote up
def get_vq_codebook(codebook_size, hidden_size):
  """Get lookup table for VQ bottleneck."""
  with tf.variable_scope("vq", reuse=tf.AUTO_REUSE):
    means = tf.get_variable(
        name="means",
        shape=[codebook_size, hidden_size],
        initializer=tf.uniform_unit_scaling_initializer())

    ema_count = tf.get_variable(
        name="ema_count",
        shape=[codebook_size],
        initializer=tf.constant_initializer(0),
        trainable=False)

    with tf.colocate_with(means):
      ema_means = tf.get_variable(
          name="ema_means",
          initializer=means.initialized_value(),
          trainable=False)

  return means, ema_means, ema_count 
Example #8
Source File: multistep_optimizer.py    From training_results_v0.5 with Apache License 2.0 6 votes vote down vote up
def _finish(self, update_ops, name_scope):
    """Updates beta_power variables every n batches and incrs counter."""
    iter_ = self._get_iter_variable()
    beta1_power, beta2_power = self._get_beta_accumulators()
    with tf.control_dependencies(update_ops):
      with tf.colocate_with(iter_):

        def update_beta_op():
          update_beta1 = beta1_power.assign(
              beta1_power * self._beta1_t,
              use_locking=self._use_locking)
          update_beta2 = beta2_power.assign(
              beta2_power * self._beta2_t,
              use_locking=self._use_locking)
          return tf.group(update_beta1, update_beta2)
        maybe_update_beta = tf.cond(
            tf.equal(iter_, 0), update_beta_op, tf.no_op)
        with tf.control_dependencies([maybe_update_beta]):
          update_iter = iter_.assign(tf.mod(iter_ + 1, self._n_t),
                                     use_locking=self._use_locking)
    return tf.group(
        *update_ops + [update_iter, maybe_update_beta], name=name_scope) 
Example #9
Source File: discretization.py    From training_results_v0.5 with Apache License 2.0 6 votes vote down vote up
def get_vq_codebook(codebook_size, hidden_size):
  """Get lookup table for VQ bottleneck."""
  with tf.variable_scope("vq", reuse=tf.AUTO_REUSE):
    means = tf.get_variable(
        name="means",
        shape=[codebook_size, hidden_size],
        initializer=tf.uniform_unit_scaling_initializer())

    ema_count = tf.get_variable(
        name="ema_count",
        shape=[codebook_size],
        initializer=tf.constant_initializer(0),
        trainable=False)

    with tf.colocate_with(means):
      ema_means = tf.get_variable(
          name="ema_means",
          initializer=means.initialized_value(),
          trainable=False)

  return means, ema_means, ema_count 
Example #10
Source File: transformer_nat.py    From BERT with Apache License 2.0 6 votes vote down vote up
def init_vq_bottleneck(bottleneck_size, hidden_size):
  """Get lookup table for VQ bottleneck."""
  means = tf.get_variable(
      name="means",
      shape=[bottleneck_size, hidden_size],
      initializer=tf.uniform_unit_scaling_initializer())
  ema_count = tf.get_variable(
      name="ema_count",
      shape=[bottleneck_size],
      initializer=tf.constant_initializer(0),
      trainable=False)
  with tf.colocate_with(means):
    ema_means = tf.get_variable(
        name="ema_means",
        initializer=means.initialized_value(),
        trainable=False)

  return means, ema_means, ema_count 
Example #11
Source File: multistep_optimizer.py    From BERT with Apache License 2.0 6 votes vote down vote up
def _finish(self, update_ops, name_scope):
    """Updates beta_power variables every n batches and incrs counter."""
    iter_ = self._get_iter_variable()
    beta1_power, beta2_power = self._get_beta_accumulators()
    with tf.control_dependencies(update_ops):
      with tf.colocate_with(iter_):

        def update_beta_op():
          update_beta1 = beta1_power.assign(
              beta1_power * self._beta1_t,
              use_locking=self._use_locking)
          update_beta2 = beta2_power.assign(
              beta2_power * self._beta2_t,
              use_locking=self._use_locking)
          return tf.group(update_beta1, update_beta2)
        maybe_update_beta = tf.cond(
            tf.equal(iter_, 0), update_beta_op, tf.no_op)
        with tf.control_dependencies([maybe_update_beta]):
          update_iter = iter_.assign(tf.mod(iter_ + 1, self._n_t),
                                     use_locking=self._use_locking)
    return tf.group(
        *update_ops + [update_iter, maybe_update_beta], name=name_scope) 
Example #12
Source File: native_module.py    From hub with Apache License 2.0 6 votes vote down vote up
def find_state_op_colocation_error(graph, reported_tags=None):
  """Returns error message for colocation of state ops, or None if ok."""
  state_op_types = list_registered_stateful_ops_without_inputs(
      graph.as_graph_def())
  state_op_map = {op.name: op for op in graph.get_operations()
                  if op.type in state_op_types}
  for op in state_op_map.values():
    for colocation_group in op.colocation_groups():
      if not (colocation_group.startswith(tf.compat.as_bytes("loc:@")) and
              tf.compat.as_str_any(colocation_group[5:]) in state_op_map):
        tags_prefix = ("" if reported_tags is None else
                       "in the graph for tags %s, " % reported_tags)
        return (
            "A state-holding node x of a module's graph (e.g., a Variable op) "
            "must not be subject to a tf.colocate_with(y) constraint "
            "unless y is also a state-holding node.\n"
            "Details: %snode '%s' has op '%s', which counts as state-holding, "
            "but Operation.colocation_groups() == %s. " %
            (tags_prefix, op.name, op.type, op.colocation_groups()))
  return None 
Example #13
Source File: discretization.py    From fine-lm with MIT License 6 votes vote down vote up
def get_vq_bottleneck(bottleneck_size, hidden_size):
  """Get lookup table for VQ bottleneck."""
  with tf.variable_scope("vq", reuse=tf.AUTO_REUSE):
    means = tf.get_variable(
        name="means",
        shape=[bottleneck_size, hidden_size],
        initializer=tf.uniform_unit_scaling_initializer())

    ema_count = tf.get_variable(
        name="ema_count",
        shape=[bottleneck_size],
        initializer=tf.constant_initializer(0),
        trainable=False)

    with tf.colocate_with(means):
      ema_means = tf.get_variable(
          name="ema_means",
          initializer=means.initialized_value(),
          trainable=False)

  return means, ema_means, ema_count 
Example #14
Source File: transformer_nat.py    From fine-lm with MIT License 6 votes vote down vote up
def init_vq_bottleneck(bottleneck_size, hidden_size):
  """Get lookup table for VQ bottleneck."""
  means = tf.get_variable(
      name="means",
      shape=[bottleneck_size, hidden_size],
      initializer=tf.uniform_unit_scaling_initializer())
  ema_count = tf.get_variable(
      name="ema_count",
      shape=[bottleneck_size],
      initializer=tf.constant_initializer(0),
      trainable=False)
  with tf.colocate_with(means):
    ema_means = tf.get_variable(
        name="ema_means",
        initializer=means.initialized_value(),
        trainable=False)

  return means, ema_means, ema_count 
Example #15
Source File: multistep_optimizer.py    From fine-lm with MIT License 6 votes vote down vote up
def _finish(self, update_ops, name_scope):
    """Updates beta_power variables every n batches and incrs counter."""
    iter_ = self._get_iter_variable()
    beta1_power, beta2_power = self._get_beta_accumulators()
    with tf.control_dependencies(update_ops):
      with tf.colocate_with(iter_):

        def update_beta_op():
          update_beta1 = beta1_power.assign(
              beta1_power * self._beta1_t,
              use_locking=self._use_locking)
          update_beta2 = beta2_power.assign(
              beta2_power * self._beta2_t,
              use_locking=self._use_locking)
          return tf.group(update_beta1, update_beta2)
        maybe_update_beta = tf.cond(
            tf.equal(iter_, 0), update_beta_op, tf.no_op)
        with tf.control_dependencies([maybe_update_beta]):
          update_iter = iter_.assign(tf.mod(iter_ + 1, self._n_t),
                                     use_locking=self._use_locking)
    return tf.group(
        *update_ops + [update_iter, maybe_update_beta], name=name_scope) 
Example #16
Source File: batch_allreduce.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def _apply_to_all_device_tensors(all_device_tensors, apply_func, colocate=True):
  """Applies a function to each tensor in `all_device_tensors`.

  A new list of lists of tensors is returned, where every tensor in
  `all_device_tensors` has had `apply_func` called on it. `all_device_tensors`
  is not modified.

  Args:
    all_device_tensors: A list of list of tensors. `all_device_tensors[i][j]` is
      a tensor where `i` is the device index and `j` is the tensor index.
    apply_func: A function taking in three arguments: tensor, device_index,
      tensor_index, and returning a modified tensor.
      `tensor` is `all_device_tensors[device_index][tensor_index]`.
    colocate: If True, apply_func will be run under context manager colocated
      with it's input tensor.
  Returns:
    A list in the same form as `all_device_tensors`, except each tensor has had
    `apply_func` called on it.
  """
  new_all_device_tensors = []
  for device_index, device_tensors in enumerate(all_device_tensors):
    new_device_tensors = []
    for tensor_index, t in enumerate(device_tensors):
      if colocate:
        with tf.colocate_with(t):
          new_t = apply_func(t, device_index, tensor_index)
      else:
        new_t = apply_func(t, device_index, tensor_index)
      new_device_tensors.append(new_t)
    new_all_device_tensors.append(new_device_tensors)
  return new_all_device_tensors 
Example #17
Source File: multistep_optimizer.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def _create_slots(self, var_list):
    """Create slot variables for Adam with accumulated gradients."""
    super(MultistepAdamOptimizer, self)._create_slots(var_list)
    first_var = min(var_list, key=lambda x: x.name)
    self._create_non_slot_variable(initial_value=0 if self._n == 1 else 1,
                                   name="iter",
                                   colocate_with=first_var)
    for v in var_list:
      self._zeros_slot(v, "grad_acc", self._name) 
Example #18
Source File: batch_allreduce.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def split_all_device_tensors(self, all_device_tensors):
    """Splits concatenated tensors into `num_splits` pieces.

    `num_splits` is specified in the constructor.  In the case where the total
    size of a concatenated tensor is not divisible by `num_splits`, the last
    split tensor gets more elements.

    Args:
      all_device_tensors: A list of list of tensors. `all_device_tensors[i][j]`
        is a tensor where `i` is the device index and `j` is the tensor index.
        For each i, `all_device_tensors[i]` must be a list of length 1 of a
        single concatenated tensor.

    Returns:
      A list of list of tensors in a similar form as all_device_tensors, except
      the concatenated tensor on each device have been split. Each inner list
      is a list of length `num_splits`.
    """
    assert self._next_method == 'split'
    new_all_device_tensors = []
    for [concat_tensor] in all_device_tensors:
      with tf.colocate_with(concat_tensor):
        new_all_device_tensors.append(self._split_tensors(concat_tensor))
    self._orig_concat_all_device_tensors = all_device_tensors
    self._next_method = 'undo_split'
    return new_all_device_tensors 
Example #19
Source File: batch_allreduce.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def undo_split_all_device_tensors(self, all_device_tensors):
    """Undoes the effects of `split_all_device_tensors`."""
    assert self._next_method == 'undo_split'
    new_all_device_tensors = []
    for i, device_tensors in enumerate(all_device_tensors):
      [orig_tensor] = self._orig_concat_all_device_tensors[i]
      with tf.colocate_with(orig_tensor):
        new_all_device_tensors.append(
            [self._undo_split_tensors(device_tensors)])
    self._next_method = 'undo_concat'
    return new_all_device_tensors 
Example #20
Source File: vq_discrete.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def __init__(self, hparams):
    self.hparams = hparams
    print ("self.hparams.z_size", self.hparams.z_size)
    # Set the discretization bottleneck specific things here
    self.hparams.z_size_per_residual = self.hparams.z_size // \
                                       self.hparams.num_residuals
    print ("self.hparams.num_residuals", self.hparams.num_residuals)
    self.hparams.block_dim = int(
        self.hparams.hidden_size // self.hparams.num_blocks)
    self.hparams.block_v_size = 2**(
        self.hparams.z_size_per_residual / self.hparams.num_blocks)
    self.hparams.block_v_size = int(self.hparams.block_v_size)
    self.means = tf.get_variable(
        name="means",
        shape=[
            self.hparams.num_blocks, self.hparams.block_v_size,
            self.hparams.block_dim
        ],
        initializer=tf.initializers.variance_scaling(distribution="uniform"))

    # Create the shadow variables if we are using EMA
    if self.hparams.ema:
      self.ema_count = tf.get_variable(
          "ema_count", [self.hparams.num_blocks, self.hparams.block_v_size],
          initializer=tf.constant_initializer(0),
          trainable=False)
      with tf.colocate_with(self.means):
        self.ema_means = tf.get_variable(
            "ema_means",
            initializer=self.means.initialized_value(),
            trainable=False) 
Example #21
Source File: batch_allreduce.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def undo_concat_all_device_tensors(self, all_device_tensors):
    """Undoes the effects of `concat_all_device_tensors`."""
    assert self._next_method == 'undo_concat'
    new_all_device_tensors = []
    for [concat_tensor], tensor_state in zip(all_device_tensors,
                                             self._tensor_states):
      with tf.colocate_with(concat_tensor):
        new_all_device_tensors.append(self._undo_concat_tensors(concat_tensor,
                                                                tensor_state))
    self._next_method = None
    return new_all_device_tensors 
Example #22
Source File: variable_mgr.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def preprocess_device_grads(self, device_grads):
    compact_grads = (self.benchmark_cnn.params.use_fp16 and
                     self.benchmark_cnn.params.compact_gradient_transfer)
    defer_grads = (self.benchmark_cnn.params.variable_consistency == 'relaxed')

    grads_to_reduce = [[g for g, _ in grad_vars] for grad_vars in device_grads]
    algorithm = batch_allreduce.algorithm_from_params(self.benchmark_cnn.params)
    reduced_grads, self._warmup_ops = algorithm.batch_all_reduce(
        grads_to_reduce, self.benchmark_cnn.params.gradient_repacking,
        compact_grads, defer_grads)
    assert not self._warmup_ops
    if (self.benchmark_cnn.params.use_fp16 and
        self.benchmark_cnn.enable_auto_loss_scale):
      # Check for infs or nans
      is_finite_list = []
      with tf.name_scope('check_for_inf_and_nan'):
        for tower_grads in reduced_grads:
          with tf.colocate_with(tower_grads[0]):
            # TODO(tanmingxing): Create fused op that takes in a list of tensors
            # as input and returns scalar boolean True if there are any
            # infs/nans.
            is_finite_list.append(tf.reduce_all(
                [tf.reduce_all(tf.is_finite(g)) for g in tower_grads]))
        self.grad_has_inf_nan = tf.logical_not(tf.reduce_all(is_finite_list))
    reduced_device_grads = [[
        (g, v) for g, (_, v) in zip(grads, grad_vars)
    ] for grads, grad_vars in zip(reduced_grads, device_grads)]
    return self.benchmark_cnn.devices, reduced_device_grads 
Example #23
Source File: batch_allreduce.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def _apply_to_all_device_tensors(all_device_tensors, apply_func, colocate=True):
  """Applies a function to each tensor in `all_device_tensors`.

  A new list of lists of tensors is returned, where every tensor in
  `all_device_tensors` has had `apply_func` called on it. `all_device_tensors`
  is not modified.

  Args:
    all_device_tensors: A list of list of tensors. `all_device_tensors[i][j]` is
      a tensor where `i` is the device index and `j` is the tensor index.
    apply_func: A function taking in three arguments: tensor, device_index,
      tensor_index, and returning a modified tensor.
      `tensor` is `all_device_tensors[device_index][tensor_index]`.
    colocate: If True, apply_func will be run under context manager colocated
      with it's input tensor.
  Returns:
    A list in the same form as `all_device_tensors`, except each tensor has had
    `apply_func` called on it.
  """
  new_all_device_tensors = []
  for device_index, device_tensors in enumerate(all_device_tensors):
    new_device_tensors = []
    for tensor_index, t in enumerate(device_tensors):
      if colocate:
        with tf.colocate_with(t):
          new_t = apply_func(t, device_index, tensor_index)
      else:
        new_t = apply_func(t, device_index, tensor_index)
      new_device_tensors.append(new_t)
    new_all_device_tensors.append(new_device_tensors)
  return new_all_device_tensors 
Example #24
Source File: batch_allreduce.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def _undo_compact_all_device_tensors(all_device_tensors,
                                     orig_all_device_tensors):
  """Uncompacts each tensor by casting to it's original dtype."""
  def apply_func(tensor, device_index, tensor_index):
    orig_tensor = orig_all_device_tensors[device_index][tensor_index]
    with tf.colocate_with(orig_tensor):
      return tf.cast(tensor, orig_tensor.dtype)
  return _apply_to_all_device_tensors(all_device_tensors, apply_func,
                                      colocate=False) 
Example #25
Source File: batch_allreduce.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def split_all_device_tensors(self, all_device_tensors):
    """Splits concatenated tensors into `num_splits` pieces.

    `num_splits` is specified in the constructor.  In the case where the total
    size of a concatenated tensor is not divisible by `num_splits`, the last
    split tensor gets more elements.

    Args:
      all_device_tensors: A list of list of tensors. `all_device_tensors[i][j]`
        is a tensor where `i` is the device index and `j` is the tensor index.
        For each i, `all_device_tensors[i]` must be a list of length 1 of a
        single concatenated tensor.

    Returns:
      A list of list of tensors in a similar form as all_device_tensors, except
      the concatenated tensor on each device have been split. Each inner list
      is a list of length `num_splits`.
    """
    assert self._next_method == 'split'
    new_all_device_tensors = []
    for [concat_tensor] in all_device_tensors:
      with tf.colocate_with(concat_tensor):
        new_all_device_tensors.append(self._split_tensors(concat_tensor))
    self._orig_concat_all_device_tensors = all_device_tensors
    self._next_method = 'undo_split'
    return new_all_device_tensors 
Example #26
Source File: batch_allreduce.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def undo_split_all_device_tensors(self, all_device_tensors):
    """Undoes the effects of `split_all_device_tensors`."""
    assert self._next_method == 'undo_split'
    new_all_device_tensors = []
    for i, device_tensors in enumerate(all_device_tensors):
      [orig_tensor] = self._orig_concat_all_device_tensors[i]
      with tf.colocate_with(orig_tensor):
        new_all_device_tensors.append(
            [self._undo_split_tensors(device_tensors)])
    self._next_method = 'undo_concat'
    return new_all_device_tensors 
Example #27
Source File: variable_mgr.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def preprocess_device_grads(self, device_grads):
    compact_grads = (self.benchmark_cnn.params.use_fp16 and
                     self.benchmark_cnn.params.compact_gradient_transfer)
    defer_grads = (self.benchmark_cnn.params.variable_consistency == 'relaxed')

    grads_to_reduce = [[g for g, _ in grad_vars] for grad_vars in device_grads]
    algorithm = batch_allreduce.algorithm_from_params(self.benchmark_cnn.params)
    reduced_grads, self._warmup_ops = algorithm.batch_all_reduce(
        grads_to_reduce, self.benchmark_cnn.params.gradient_repacking,
        compact_grads, defer_grads)
    assert not self._warmup_ops
    if (self.benchmark_cnn.params.use_fp16 and
        self.benchmark_cnn.enable_auto_loss_scale):
      # Check for infs or nans
      is_finite_list = []
      with tf.name_scope('check_for_inf_and_nan'):
        for tower_grads in reduced_grads:
          with tf.colocate_with(tower_grads[0]):
            # TODO(tanmingxing): Create fused op that takes in a list of tensors
            # as input and returns scalar boolean True if there are any
            # infs/nans.
            is_finite_list.append(tf.reduce_all(
                [tf.reduce_all(tf.is_finite(g)) for g in tower_grads]))
        self.grad_has_inf_nan = tf.logical_not(tf.reduce_all(is_finite_list))
    reduced_device_grads = [[
        (g, v) for g, (_, v) in zip(grads, grad_vars)
    ] for grads, grad_vars in zip(reduced_grads, device_grads)]
    return self.benchmark_cnn.devices, reduced_device_grads 
Example #28
Source File: math_utils.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def clip_by_global_norm(t_list, clip_norm, use_norm=None, name=None):
  """Custom version of tf.clip_by_global_norm that doesn't check numerics."""
  if (not isinstance(t_list, collections.Sequence)
      or isinstance(t_list, six.string_types)):
    raise TypeError("t_list should be a sequence")
  t_list = list(t_list)
  if use_norm is None:
    use_norm = tf.global_norm(t_list, name)

  with tf.name_scope(name, "clip_by_global_norm", t_list + [clip_norm]) as name:
    # Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm
    scale = clip_norm * tf.minimum(
        1.0 / use_norm,
        tf.constant(1.0, dtype=use_norm.dtype) / clip_norm)

    values = [
        tf.convert_to_tensor(
            t.values if isinstance(t, tf.IndexedSlices) else t,
            name="t_%d" % i)
        if t is not None else t
        for i, t in enumerate(t_list)]

    values_clipped = []
    for i, v in enumerate(values):
      if v is None:
        values_clipped.append(None)
      else:
        with tf.colocate_with(v):
          values_clipped.append(
              tf.identity(v * scale, name="%s_%d" % (name, i)))

    list_clipped = [
        tf.IndexedSlices(c_v, t.indices, t.dense_shape)
        if isinstance(t, tf.IndexedSlices)
        else c_v
        for (c_v, t) in zip(values_clipped, t_list)]

  return list_clipped, use_norm 
Example #29
Source File: native_module.py    From hub with Apache License 2.0 5 votes vote down vote up
def find_signature_input_colocation_error(signature_name, inputs):
  """Returns error message for colocation of signature inputs, or None if ok."""
  for input_name, tensor in inputs.items():
    expected_colocation_groups = [tf.compat.as_bytes("loc:@" + tensor.op.name)]
    if tensor.op.colocation_groups() != expected_colocation_groups:
      return (
          "A tensor x used as input in a signature must not be subject to a "
          "tf.colocate_with(y) constraint. (The reverse would be allowed.)\n"
          "Details: tensor '%s' appears as input '%s' of signature '%s' "
          "but has Tensor.op.colocation_groups() == %s" %
          (tensor, input_name, signature_name, tensor.op.colocation_groups()))
  return None 
Example #30
Source File: curvature_matrix_vector_products.py    From kfac with Apache License 2.0 5 votes vote down vote up
def _multiply_across_losses(self, mult_func, vecs, coeff_mode="regular"):
    products = []
    for loss, vec in zip(self._losses, vecs):
      with tf.colocate_with(self._loss_colocation_ops[loss]):
        if coeff_mode == "regular":
          multiplier = self._get_loss_coeff(loss)
        elif coeff_mode == "sqrt":
          multiplier = tf.sqrt(self._get_loss_coeff(loss))
        val = mult_func(loss, vec)
        products.append(tf.cast(multiplier, dtype=val.dtype) * val)
    return tuple(products)