Python Examples of tensorflow.colocate

Source File: native_module.py From hub with Apache License 2.0

6 votes

def find_signature_inputs_from_multivalued_ops(inputs):
  """Returns error message for module inputs from ops with multiple outputs."""
  dense_inputs = []  # List of (str, Tensor), with SparseTensors decomposed.
  for name, tensor in sorted(inputs.items()):
    if isinstance(tensor, tf.SparseTensor):
      dense_inputs.extend(("%s.%s" % (name, attr), getattr(tensor, attr))
                          for attr in ("indices", "values", "dense_shape"))
    else:
      dense_inputs.append((name, tensor))
  warnings = [(name, tensor.name) for name, tensor in dense_inputs
              if len(tensor.op.outputs) != 1]
  if warnings:
    return (
        "WARNING: The inputs declared in hub.add_signature() should be tensors "
        "from ops with a single output, or else uses of tf.colocate_with() on "
        "that op can trigger fatal errors when the module is applied and "
        "colocation constraints have to be rewritten.\nAffected inputs: %s" %
        ", ".join("%s='%s'" % pair for pair in warnings))
  return None

Source File: batch_allreduce.py From training_results_v0.5 with Apache License 2.0

6 votes

def concat_all_device_tensors(self, all_device_tensors):
    """For each device, concatenate the device's tensors into a single tensor.

    Args:
      all_device_tensors: A list of list of tensors. `all_device_tensors[i][j]`
        is a tensor where `i` is the device index and `j` is the tensor index.

    Returns:
      A list of list of tensors in a similar form as all_device_tensors, except
      the tensors on each device have been concatenated. Each inner list
      consists of a single concatenated tensor.
    """
    assert self._next_method == 'concat'
    new_all_device_tensors = []
    tensor_states = []
    for device_tensors in all_device_tensors:
      with tf.colocate_with(device_tensors[0]):
        concat_tensor, tensor_state = self._concat_tensors(device_tensors)
        new_all_device_tensors.append([concat_tensor])
        tensor_states.append(tensor_state)
    self._tensor_states = tensor_states
    self._next_method = 'split'
    return new_all_device_tensors

Source File: layer_collection.py From kfac with Apache License 2.0

6 votes

def eval_losses(self, target_mode="data", coeff_mode="regular"):
    """Returns evaluated losses (colocated with inputs to losses)."""
    evals = []
    for loss in self.losses:
      with tf.colocate_with(self.loss_colocation_ops[loss]):
        if target_mode == "data":
          loss_value = loss.evaluate()
        elif target_mode == "sample":
          loss_value = loss.evaluate_on_sample()
        else:
          raise ValueError("target_mode must be in ['data', 'sample']")

        if coeff_mode == "regular":
          multiplier = self.loss_coeffs[loss]
        elif coeff_mode == "sqrt":
          multiplier = tf.sqrt(self.loss_coeffs[loss])
        elif coeff_mode == "off":
          multiplier = 1.0
        else:
          raise ValueError("coeff_mode must be in ['regular', 'sqrt', 'off']")
        multiplier = tf.cast(multiplier, dtype=loss_value.dtype)
        evals.append(multiplier * loss_value)
    return evals

Source File: batch_allreduce.py From training_results_v0.5 with Apache License 2.0

6 votes

def concat_all_device_tensors(self, all_device_tensors):
    """For each device, concatenate the device's tensors into a single tensor.

    Args:
      all_device_tensors: A list of list of tensors. `all_device_tensors[i][j]`
        is a tensor where `i` is the device index and `j` is the tensor index.

    Returns:
      A list of list of tensors in a similar form as all_device_tensors, except
      the tensors on each device have been concatenated. Each inner list
      consists of a single concatenated tensor.
    """
    assert self._next_method == 'concat'
    new_all_device_tensors = []
    tensor_states = []
    for device_tensors in all_device_tensors:
      with tf.colocate_with(device_tensors[0]):
        concat_tensor, tensor_state = self._concat_tensors(device_tensors)
        new_all_device_tensors.append([concat_tensor])
        tensor_states.append(tensor_state)
    self._tensor_states = tensor_states
    self._next_method = 'split'
    return new_all_device_tensors

Source File: batch_allreduce.py From training_results_v0.5 with Apache License 2.0

6 votes

def concat_all_device_tensors(self, all_device_tensors):
    """For each device, concatenate the device's tensors into a single tensor.

    Args:
      all_device_tensors: A list of list of tensors. `all_device_tensors[i][j]`
        is a tensor where `i` is the device index and `j` is the tensor index.

    Returns:
      A list of list of tensors in a similar form as all_device_tensors, except
      the tensors on each device have been concatenated. Each inner list
      consists of a single concatenated tensor.
    """
    assert self._next_method == 'concat'
    new_all_device_tensors = []
    tensor_states = []
    for device_tensors in all_device_tensors:
      with tf.colocate_with(device_tensors[0]):
        concat_tensor, tensor_state = self._concat_tensors(device_tensors)
        new_all_device_tensors.append([concat_tensor])
        tensor_states.append(tensor_state)
    self._tensor_states = tensor_states
    self._next_method = 'split'
    return new_all_device_tensors

Source File: estimator.py From kfac with Apache License 2.0

6 votes

def _get_transformed_random_signs(self):
    if self.mat_type == "Fisher":
      mult_func = lambda loss, index: loss.multiply_fisher_factor(index)
      inner_shape_func = lambda loss: loss.fisher_factor_inner_shape
    elif self.mat_type == "GGN":
      mult_func = lambda loss, index: loss.multiply_ggn_factor(index)
      inner_shape_func = lambda loss: loss.ggn_factor_inner_shape

    transformed_random_signs = []
    for loss in self.layers.losses:
      with tf.colocate_with(self.layers.loss_colocation_ops[loss]):
        value = mult_func(loss,
                          utils.generate_random_signs(inner_shape_func(loss),
                                                      dtype=loss.dtype))
        coeff = tf.cast(self.layers.loss_coeffs[loss], dtype=value.dtype)
        transformed_random_signs.append(tf.sqrt(coeff) * value)
    return transformed_random_signs

Source File: discretization.py From BERT with Apache License 2.0

6 votes

def get_vq_codebook(codebook_size, hidden_size):
  """Get lookup table for VQ bottleneck."""
  with tf.variable_scope("vq", reuse=tf.AUTO_REUSE):
    means = tf.get_variable(
        name="means",
        shape=[codebook_size, hidden_size],
        initializer=tf.uniform_unit_scaling_initializer())

    ema_count = tf.get_variable(
        name="ema_count",
        shape=[codebook_size],
        initializer=tf.constant_initializer(0),
        trainable=False)

    with tf.colocate_with(means):
      ema_means = tf.get_variable(
          name="ema_means",
          initializer=means.initialized_value(),
          trainable=False)

  return means, ema_means, ema_count

Source File: multistep_optimizer.py From training_results_v0.5 with Apache License 2.0

6 votes

def _finish(self, update_ops, name_scope):
    """Updates beta_power variables every n batches and incrs counter."""
    iter_ = self._get_iter_variable()
    beta1_power, beta2_power = self._get_beta_accumulators()
    with tf.control_dependencies(update_ops):
      with tf.colocate_with(iter_):

        def update_beta_op():
          update_beta1 = beta1_power.assign(
              beta1_power * self._beta1_t,
              use_locking=self._use_locking)
          update_beta2 = beta2_power.assign(
              beta2_power * self._beta2_t,
              use_locking=self._use_locking)
          return tf.group(update_beta1, update_beta2)
        maybe_update_beta = tf.cond(
            tf.equal(iter_, 0), update_beta_op, tf.no_op)
        with tf.control_dependencies([maybe_update_beta]):
          update_iter = iter_.assign(tf.mod(iter_ + 1, self._n_t),
                                     use_locking=self._use_locking)
    return tf.group(
        *update_ops + [update_iter, maybe_update_beta], name=name_scope)

Source File: discretization.py From training_results_v0.5 with Apache License 2.0

6 votes

def get_vq_codebook(codebook_size, hidden_size):
  """Get lookup table for VQ bottleneck."""
  with tf.variable_scope("vq", reuse=tf.AUTO_REUSE):
    means = tf.get_variable(
        name="means",
        shape=[codebook_size, hidden_size],
        initializer=tf.uniform_unit_scaling_initializer())

    ema_count = tf.get_variable(
        name="ema_count",
        shape=[codebook_size],
        initializer=tf.constant_initializer(0),
        trainable=False)

    with tf.colocate_with(means):
      ema_means = tf.get_variable(
          name="ema_means",
          initializer=means.initialized_value(),
          trainable=False)

  return means, ema_means, ema_count

Source File: transformer_nat.py From BERT with Apache License 2.0

6 votes

def init_vq_bottleneck(bottleneck_size, hidden_size):
  """Get lookup table for VQ bottleneck."""
  means = tf.get_variable(
      name="means",
      shape=[bottleneck_size, hidden_size],
      initializer=tf.uniform_unit_scaling_initializer())
  ema_count = tf.get_variable(
      name="ema_count",
      shape=[bottleneck_size],
      initializer=tf.constant_initializer(0),
      trainable=False)
  with tf.colocate_with(means):
    ema_means = tf.get_variable(
        name="ema_means",
        initializer=means.initialized_value(),
        trainable=False)

  return means, ema_means, ema_count

Source File: multistep_optimizer.py From BERT with Apache License 2.0

6 votes

def _finish(self, update_ops, name_scope):
    """Updates beta_power variables every n batches and incrs counter."""
    iter_ = self._get_iter_variable()
    beta1_power, beta2_power = self._get_beta_accumulators()
    with tf.control_dependencies(update_ops):
      with tf.colocate_with(iter_):

        def update_beta_op():
          update_beta1 = beta1_power.assign(
              beta1_power * self._beta1_t,
              use_locking=self._use_locking)
          update_beta2 = beta2_power.assign(
              beta2_power * self._beta2_t,
              use_locking=self._use_locking)
          return tf.group(update_beta1, update_beta2)
        maybe_update_beta = tf.cond(
            tf.equal(iter_, 0), update_beta_op, tf.no_op)
        with tf.control_dependencies([maybe_update_beta]):
          update_iter = iter_.assign(tf.mod(iter_ + 1, self._n_t),
                                     use_locking=self._use_locking)
    return tf.group(
        *update_ops + [update_iter, maybe_update_beta], name=name_scope)

Source File: native_module.py From hub with Apache License 2.0

6 votes

def find_state_op_colocation_error(graph, reported_tags=None):
  """Returns error message for colocation of state ops, or None if ok."""
  state_op_types = list_registered_stateful_ops_without_inputs(
      graph.as_graph_def())
  state_op_map = {op.name: op for op in graph.get_operations()
                  if op.type in state_op_types}
  for op in state_op_map.values():
    for colocation_group in op.colocation_groups():
      if not (colocation_group.startswith(tf.compat.as_bytes("loc:@")) and
              tf.compat.as_str_any(colocation_group[5:]) in state_op_map):
        tags_prefix = ("" if reported_tags is None else
                       "in the graph for tags %s, " % reported_tags)
        return (
            "A state-holding node x of a module's graph (e.g., a Variable op) "
            "must not be subject to a tf.colocate_with(y) constraint "
            "unless y is also a state-holding node.\n"
            "Details: %snode '%s' has op '%s', which counts as state-holding, "
            "but Operation.colocation_groups() == %s. " %
            (tags_prefix, op.name, op.type, op.colocation_groups()))
  return None

Source File: discretization.py From fine-lm with MIT License

6 votes

def get_vq_bottleneck(bottleneck_size, hidden_size):
  """Get lookup table for VQ bottleneck."""
  with tf.variable_scope("vq", reuse=tf.AUTO_REUSE):
    means = tf.get_variable(
        name="means",
        shape=[bottleneck_size, hidden_size],
        initializer=tf.uniform_unit_scaling_initializer())

    ema_count = tf.get_variable(
        name="ema_count",
        shape=[bottleneck_size],
        initializer=tf.constant_initializer(0),
        trainable=False)

    with tf.colocate_with(means):
      ema_means = tf.get_variable(
          name="ema_means",
          initializer=means.initialized_value(),
          trainable=False)

  return means, ema_means, ema_count

Source File: transformer_nat.py From fine-lm with MIT License

6 votes

def init_vq_bottleneck(bottleneck_size, hidden_size):
  """Get lookup table for VQ bottleneck."""
  means = tf.get_variable(
      name="means",
      shape=[bottleneck_size, hidden_size],
      initializer=tf.uniform_unit_scaling_initializer())
  ema_count = tf.get_variable(
      name="ema_count",
      shape=[bottleneck_size],
      initializer=tf.constant_initializer(0),
      trainable=False)
  with tf.colocate_with(means):
    ema_means = tf.get_variable(
        name="ema_means",
        initializer=means.initialized_value(),
        trainable=False)

  return means, ema_means, ema_count

Source File: multistep_optimizer.py From fine-lm with MIT License

6 votes

def _finish(self, update_ops, name_scope):
    """Updates beta_power variables every n batches and incrs counter."""
    iter_ = self._get_iter_variable()
    beta1_power, beta2_power = self._get_beta_accumulators()
    with tf.control_dependencies(update_ops):
      with tf.colocate_with(iter_):

        def update_beta_op():
          update_beta1 = beta1_power.assign(
              beta1_power * self._beta1_t,
              use_locking=self._use_locking)
          update_beta2 = beta2_power.assign(
              beta2_power * self._beta2_t,
              use_locking=self._use_locking)
          return tf.group(update_beta1, update_beta2)
        maybe_update_beta = tf.cond(
            tf.equal(iter_, 0), update_beta_op, tf.no_op)
        with tf.control_dependencies([maybe_update_beta]):
          update_iter = iter_.assign(tf.mod(iter_ + 1, self._n_t),
                                     use_locking=self._use_locking)
    return tf.group(
        *update_ops + [update_iter, maybe_update_beta], name=name_scope)

Source File: batch_allreduce.py From training_results_v0.5 with Apache License 2.0

5 votes

def _apply_to_all_device_tensors(all_device_tensors, apply_func, colocate=True):
  """Applies a function to each tensor in `all_device_tensors`.

  A new list of lists of tensors is returned, where every tensor in
  `all_device_tensors` has had `apply_func` called on it. `all_device_tensors`
  is not modified.

  Args:
    all_device_tensors: A list of list of tensors. `all_device_tensors[i][j]` is
      a tensor where `i` is the device index and `j` is the tensor index.
    apply_func: A function taking in three arguments: tensor, device_index,
      tensor_index, and returning a modified tensor.
      `tensor` is `all_device_tensors[device_index][tensor_index]`.
    colocate: If True, apply_func will be run under context manager colocated
      with it's input tensor.
  Returns:
    A list in the same form as `all_device_tensors`, except each tensor has had
    `apply_func` called on it.
  """
  new_all_device_tensors = []
  for device_index, device_tensors in enumerate(all_device_tensors):
    new_device_tensors = []
    for tensor_index, t in enumerate(device_tensors):
      if colocate:
        with tf.colocate_with(t):
          new_t = apply_func(t, device_index, tensor_index)
      else:
        new_t = apply_func(t, device_index, tensor_index)
      new_device_tensors.append(new_t)
    new_all_device_tensors.append(new_device_tensors)
  return new_all_device_tensors

Source File: multistep_optimizer.py From training_results_v0.5 with Apache License 2.0

5 votes

def _create_slots(self, var_list):
    """Create slot variables for Adam with accumulated gradients."""
    super(MultistepAdamOptimizer, self)._create_slots(var_list)
    first_var = min(var_list, key=lambda x: x.name)
    self._create_non_slot_variable(initial_value=0 if self._n == 1 else 1,
                                   name="iter",
                                   colocate_with=first_var)
    for v in var_list:
      self._zeros_slot(v, "grad_acc", self._name)

Source File: batch_allreduce.py From training_results_v0.5 with Apache License 2.0

5 votes

def split_all_device_tensors(self, all_device_tensors):
    """Splits concatenated tensors into `num_splits` pieces.

    `num_splits` is specified in the constructor.  In the case where the total
    size of a concatenated tensor is not divisible by `num_splits`, the last
    split tensor gets more elements.

    Args:
      all_device_tensors: A list of list of tensors. `all_device_tensors[i][j]`
        is a tensor where `i` is the device index and `j` is the tensor index.
        For each i, `all_device_tensors[i]` must be a list of length 1 of a
        single concatenated tensor.

    Returns:
      A list of list of tensors in a similar form as all_device_tensors, except
      the concatenated tensor on each device have been split. Each inner list
      is a list of length `num_splits`.
    """
    assert self._next_method == 'split'
    new_all_device_tensors = []
    for [concat_tensor] in all_device_tensors:
      with tf.colocate_with(concat_tensor):
        new_all_device_tensors.append(self._split_tensors(concat_tensor))
    self._orig_concat_all_device_tensors = all_device_tensors
    self._next_method = 'undo_split'
    return new_all_device_tensors

Source File: batch_allreduce.py From training_results_v0.5 with Apache License 2.0

5 votes

def undo_split_all_device_tensors(self, all_device_tensors):
    """Undoes the effects of `split_all_device_tensors`."""
    assert self._next_method == 'undo_split'
    new_all_device_tensors = []
    for i, device_tensors in enumerate(all_device_tensors):
      [orig_tensor] = self._orig_concat_all_device_tensors[i]
      with tf.colocate_with(orig_tensor):
        new_all_device_tensors.append(
            [self._undo_split_tensors(device_tensors)])
    self._next_method = 'undo_concat'
    return new_all_device_tensors

Source File: vq_discrete.py From training_results_v0.5 with Apache License 2.0

5 votes

def __init__(self, hparams):
    self.hparams = hparams
    print ("self.hparams.z_size", self.hparams.z_size)
    # Set the discretization bottleneck specific things here
    self.hparams.z_size_per_residual = self.hparams.z_size // \
                                       self.hparams.num_residuals
    print ("self.hparams.num_residuals", self.hparams.num_residuals)
    self.hparams.block_dim = int(
        self.hparams.hidden_size // self.hparams.num_blocks)
    self.hparams.block_v_size = 2**(
        self.hparams.z_size_per_residual / self.hparams.num_blocks)
    self.hparams.block_v_size = int(self.hparams.block_v_size)
    self.means = tf.get_variable(
        name="means",
        shape=[
            self.hparams.num_blocks, self.hparams.block_v_size,
            self.hparams.block_dim
        ],
        initializer=tf.initializers.variance_scaling(distribution="uniform"))

    # Create the shadow variables if we are using EMA
    if self.hparams.ema:
      self.ema_count = tf.get_variable(
          "ema_count", [self.hparams.num_blocks, self.hparams.block_v_size],
          initializer=tf.constant_initializer(0),
          trainable=False)
      with tf.colocate_with(self.means):
        self.ema_means = tf.get_variable(
            "ema_means",
            initializer=self.means.initialized_value(),
            trainable=False)

Source File: batch_allreduce.py From training_results_v0.5 with Apache License 2.0

5 votes

def undo_concat_all_device_tensors(self, all_device_tensors):
    """Undoes the effects of `concat_all_device_tensors`."""
    assert self._next_method == 'undo_concat'
    new_all_device_tensors = []
    for [concat_tensor], tensor_state in zip(all_device_tensors,
                                             self._tensor_states):
      with tf.colocate_with(concat_tensor):
        new_all_device_tensors.append(self._undo_concat_tensors(concat_tensor,
                                                                tensor_state))
    self._next_method = None
    return new_all_device_tensors

Source File: variable_mgr.py From training_results_v0.5 with Apache License 2.0

5 votes

def preprocess_device_grads(self, device_grads):
    compact_grads = (self.benchmark_cnn.params.use_fp16 and
                     self.benchmark_cnn.params.compact_gradient_transfer)
    defer_grads = (self.benchmark_cnn.params.variable_consistency == 'relaxed')

    grads_to_reduce = [[g for g, _ in grad_vars] for grad_vars in device_grads]
    algorithm = batch_allreduce.algorithm_from_params(self.benchmark_cnn.params)
    reduced_grads, self._warmup_ops = algorithm.batch_all_reduce(
        grads_to_reduce, self.benchmark_cnn.params.gradient_repacking,
        compact_grads, defer_grads)
    assert not self._warmup_ops
    if (self.benchmark_cnn.params.use_fp16 and
        self.benchmark_cnn.enable_auto_loss_scale):
      # Check for infs or nans
      is_finite_list = []
      with tf.name_scope('check_for_inf_and_nan'):
        for tower_grads in reduced_grads:
          with tf.colocate_with(tower_grads[0]):
            # TODO(tanmingxing): Create fused op that takes in a list of tensors
            # as input and returns scalar boolean True if there are any
            # infs/nans.
            is_finite_list.append(tf.reduce_all(
                [tf.reduce_all(tf.is_finite(g)) for g in tower_grads]))
        self.grad_has_inf_nan = tf.logical_not(tf.reduce_all(is_finite_list))
    reduced_device_grads = [[
        (g, v) for g, (_, v) in zip(grads, grad_vars)
    ] for grads, grad_vars in zip(reduced_grads, device_grads)]
    return self.benchmark_cnn.devices, reduced_device_grads

Source File: batch_allreduce.py From training_results_v0.5 with Apache License 2.0

5 votes

def _apply_to_all_device_tensors(all_device_tensors, apply_func, colocate=True):
  """Applies a function to each tensor in `all_device_tensors`.

  A new list of lists of tensors is returned, where every tensor in
  `all_device_tensors` has had `apply_func` called on it. `all_device_tensors`
  is not modified.

  Args:
    all_device_tensors: A list of list of tensors. `all_device_tensors[i][j]` is
      a tensor where `i` is the device index and `j` is the tensor index.
    apply_func: A function taking in three arguments: tensor, device_index,
      tensor_index, and returning a modified tensor.
      `tensor` is `all_device_tensors[device_index][tensor_index]`.
    colocate: If True, apply_func will be run under context manager colocated
      with it's input tensor.
  Returns:
    A list in the same form as `all_device_tensors`, except each tensor has had
    `apply_func` called on it.
  """
  new_all_device_tensors = []
  for device_index, device_tensors in enumerate(all_device_tensors):
    new_device_tensors = []
    for tensor_index, t in enumerate(device_tensors):
      if colocate:
        with tf.colocate_with(t):
          new_t = apply_func(t, device_index, tensor_index)
      else:
        new_t = apply_func(t, device_index, tensor_index)
      new_device_tensors.append(new_t)
    new_all_device_tensors.append(new_device_tensors)
  return new_all_device_tensors

Source File: batch_allreduce.py From training_results_v0.5 with Apache License 2.0

5 votes

def _undo_compact_all_device_tensors(all_device_tensors,
                                     orig_all_device_tensors):
  """Uncompacts each tensor by casting to it's original dtype."""
  def apply_func(tensor, device_index, tensor_index):
    orig_tensor = orig_all_device_tensors[device_index][tensor_index]
    with tf.colocate_with(orig_tensor):
      return tf.cast(tensor, orig_tensor.dtype)
  return _apply_to_all_device_tensors(all_device_tensors, apply_func,
                                      colocate=False)

Source File: batch_allreduce.py From training_results_v0.5 with Apache License 2.0

5 votes

def split_all_device_tensors(self, all_device_tensors):
    """Splits concatenated tensors into `num_splits` pieces.

    `num_splits` is specified in the constructor.  In the case where the total
    size of a concatenated tensor is not divisible by `num_splits`, the last
    split tensor gets more elements.

    Args:
      all_device_tensors: A list of list of tensors. `all_device_tensors[i][j]`
        is a tensor where `i` is the device index and `j` is the tensor index.
        For each i, `all_device_tensors[i]` must be a list of length 1 of a
        single concatenated tensor.

    Returns:
      A list of list of tensors in a similar form as all_device_tensors, except
      the concatenated tensor on each device have been split. Each inner list
      is a list of length `num_splits`.
    """
    assert self._next_method == 'split'
    new_all_device_tensors = []
    for [concat_tensor] in all_device_tensors:
      with tf.colocate_with(concat_tensor):
        new_all_device_tensors.append(self._split_tensors(concat_tensor))
    self._orig_concat_all_device_tensors = all_device_tensors
    self._next_method = 'undo_split'
    return new_all_device_tensors

Source File: batch_allreduce.py From training_results_v0.5 with Apache License 2.0

5 votes

def undo_split_all_device_tensors(self, all_device_tensors):
    """Undoes the effects of `split_all_device_tensors`."""
    assert self._next_method == 'undo_split'
    new_all_device_tensors = []
    for i, device_tensors in enumerate(all_device_tensors):
      [orig_tensor] = self._orig_concat_all_device_tensors[i]
      with tf.colocate_with(orig_tensor):
        new_all_device_tensors.append(
            [self._undo_split_tensors(device_tensors)])
    self._next_method = 'undo_concat'
    return new_all_device_tensors

Source File: variable_mgr.py From training_results_v0.5 with Apache License 2.0

5 votes

def preprocess_device_grads(self, device_grads):
    compact_grads = (self.benchmark_cnn.params.use_fp16 and
                     self.benchmark_cnn.params.compact_gradient_transfer)
    defer_grads = (self.benchmark_cnn.params.variable_consistency == 'relaxed')

    grads_to_reduce = [[g for g, _ in grad_vars] for grad_vars in device_grads]
    algorithm = batch_allreduce.algorithm_from_params(self.benchmark_cnn.params)
    reduced_grads, self._warmup_ops = algorithm.batch_all_reduce(
        grads_to_reduce, self.benchmark_cnn.params.gradient_repacking,
        compact_grads, defer_grads)
    assert not self._warmup_ops
    if (self.benchmark_cnn.params.use_fp16 and
        self.benchmark_cnn.enable_auto_loss_scale):
      # Check for infs or nans
      is_finite_list = []
      with tf.name_scope('check_for_inf_and_nan'):
        for tower_grads in reduced_grads:
          with tf.colocate_with(tower_grads[0]):
            # TODO(tanmingxing): Create fused op that takes in a list of tensors
            # as input and returns scalar boolean True if there are any
            # infs/nans.
            is_finite_list.append(tf.reduce_all(
                [tf.reduce_all(tf.is_finite(g)) for g in tower_grads]))
        self.grad_has_inf_nan = tf.logical_not(tf.reduce_all(is_finite_list))
    reduced_device_grads = [[
        (g, v) for g, (_, v) in zip(grads, grad_vars)
    ] for grads, grad_vars in zip(reduced_grads, device_grads)]
    return self.benchmark_cnn.devices, reduced_device_grads

Source File: math_utils.py From training_results_v0.5 with Apache License 2.0

5 votes

def clip_by_global_norm(t_list, clip_norm, use_norm=None, name=None):
  """Custom version of tf.clip_by_global_norm that doesn't check numerics."""
  if (not isinstance(t_list, collections.Sequence)
      or isinstance(t_list, six.string_types)):
    raise TypeError("t_list should be a sequence")
  t_list = list(t_list)
  if use_norm is None:
    use_norm = tf.global_norm(t_list, name)

  with tf.name_scope(name, "clip_by_global_norm", t_list + [clip_norm]) as name:
    # Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm
    scale = clip_norm * tf.minimum(
        1.0 / use_norm,
        tf.constant(1.0, dtype=use_norm.dtype) / clip_norm)

    values = [
        tf.convert_to_tensor(
            t.values if isinstance(t, tf.IndexedSlices) else t,
            name="t_%d" % i)
        if t is not None else t
        for i, t in enumerate(t_list)]

    values_clipped = []
    for i, v in enumerate(values):
      if v is None:
        values_clipped.append(None)
      else:
        with tf.colocate_with(v):
          values_clipped.append(
              tf.identity(v * scale, name="%s_%d" % (name, i)))

    list_clipped = [
        tf.IndexedSlices(c_v, t.indices, t.dense_shape)
        if isinstance(t, tf.IndexedSlices)
        else c_v
        for (c_v, t) in zip(values_clipped, t_list)]

  return list_clipped, use_norm

Source File: native_module.py From hub with Apache License 2.0

5 votes

def find_signature_input_colocation_error(signature_name, inputs):
  """Returns error message for colocation of signature inputs, or None if ok."""
  for input_name, tensor in inputs.items():
    expected_colocation_groups = [tf.compat.as_bytes("loc:@" + tensor.op.name)]
    if tensor.op.colocation_groups() != expected_colocation_groups:
      return (
          "A tensor x used as input in a signature must not be subject to a "
          "tf.colocate_with(y) constraint. (The reverse would be allowed.)\n"
          "Details: tensor '%s' appears as input '%s' of signature '%s' "
          "but has Tensor.op.colocation_groups() == %s" %
          (tensor, input_name, signature_name, tensor.op.colocation_groups()))
  return None

Source File: curvature_matrix_vector_products.py From kfac with Apache License 2.0

5 votes

def _multiply_across_losses(self, mult_func, vecs, coeff_mode="regular"):
    products = []
    for loss, vec in zip(self._losses, vecs):
      with tf.colocate_with(self._loss_colocation_ops[loss]):
        if coeff_mode == "regular":
          multiplier = self._get_loss_coeff(loss)
        elif coeff_mode == "sqrt":
          multiplier = tf.sqrt(self._get_loss_coeff(loss))
        val = mult_func(loss, vec)
        products.append(tf.cast(multiplier, dtype=val.dtype) * val)
    return tuple(products)

Python tensorflow.colocate_with() Examples