Python Examples of tensorflow.compat.v1.get

Source File: variable_mgr.py From benchmarks with Apache License 2.0

6 votes

def get_gradients_to_apply(self, device_num, gradient_state):
    device_grads = gradient_state  # From 2nd result of preprocess_device_grads.

    avg_grads, self.grad_has_inf_nan = (
        variable_mgr_util.aggregate_gradients_using_copy_with_device_selection(
            self.benchmark_cnn,
            device_grads,
            use_mean=True,
            check_inf_nan=self.benchmark_cnn.enable_auto_loss_scale))

    # Make shadow variable on a parameter server for each original trainable
    # variable.
    for i, (g, v) in enumerate(avg_grads):
      my_name = variable_mgr_util.PS_SHADOW_VAR_PREFIX + '/' + v.name
      if my_name.endswith(':0'):
        my_name = my_name[:-2]
      new_v = tf.get_variable(
          my_name,
          dtype=v.dtype.base_dtype,
          initializer=v.initial_value,
          trainable=True)
      avg_grads[i] = (g, new_v)
    return avg_grads

Source File: universal_transformer_util.py From tensor2tensor with Apache License 2.0

6 votes

def add_depth_embedding(x):
  """Add n-dimensional embedding as the depth embedding (timing signal).

  Adds embeddings to represent the position of the step in the recurrent
  tower.

  Args:
    x: a tensor with shape [max_step, batch, length, depth]

  Returns:
    a Tensor the same shape as x.
  """
  x_shape = common_layers.shape_list(x)
  depth = x_shape[-1]
  num_steps = x_shape[0]
  shape = [num_steps, 1, 1, depth]
  depth_embedding = (
      tf.get_variable(
          "depth_embedding",
          shape,
          initializer=tf.random_normal_initializer(0, depth**-0.5)) * (depth**
                                                                       0.5))

  x += depth_embedding
  return x

Source File: common_layers_test.py From tensor2tensor with Apache License 2.0

6 votes

def testSpectralNorm(self):
    # Test that after 20 calls to apply_spectral_norm, the spectral
    # norm of the normalized matrix is close to 1.0
    with tf.Graph().as_default():
      weights = tf.get_variable("w", dtype=tf.float32, shape=[2, 3, 50, 100])
      weights = tf.multiply(weights, 10.0)
      normed_weight, assign_op = common_layers.apply_spectral_norm(weights)

      with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for _ in range(20):
          sess.run(assign_op)
          normed_weight, assign_op = common_layers.apply_spectral_norm(
              weights)
        normed_weight = sess.run(normed_weight).reshape(-1, 100)
        _, s, _ = np.linalg.svd(normed_weight)
        self.assertTrue(np.allclose(s[0], 1.0, rtol=0.1))

Source File: transformer_nat.py From tensor2tensor with Apache License 2.0

6 votes

def init_vq_bottleneck(bottleneck_size, hidden_size):
  """Get lookup table for VQ bottleneck."""
  means = tf.get_variable(
      name="means",
      shape=[bottleneck_size, hidden_size],
      initializer=tf.uniform_unit_scaling_initializer())
  ema_count = tf.get_variable(
      name="ema_count",
      shape=[bottleneck_size],
      initializer=tf.constant_initializer(0),
      trainable=False)
  with tf.colocate_with(means):
    ema_means = tf.get_variable(
        name="ema_means",
        initializer=means.initialized_value(),
        trainable=False)

  return means, ema_means, ema_count

Source File: glow_ops.py From tensor2tensor with Apache License 2.0

6 votes

def scale_gaussian_prior(name, z, logscale_factor=3.0, trainable=True):
  """Returns N(s^i * z^i, std^i) where s^i and std^i are pre-component.

  s^i is a learnable parameter with identity initialization.
  std^i is optionally learnable with identity initialization.

  Args:
    name: variable scope.
    z: input_tensor
    logscale_factor: equivalent to scaling up the learning_rate by a factor
                     of logscale_factor.
    trainable: Whether or not std^i is learnt.
  """
  with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
    z_shape = common_layers.shape_list(z)
    latent_multiplier = tf.get_variable(
        "latent_multiplier", shape=z_shape, dtype=tf.float32,
        initializer=tf.ones_initializer())
    log_scale = tf.get_variable(
        "log_scale_latent", shape=z_shape, dtype=tf.float32,
        initializer=tf.zeros_initializer(), trainable=trainable)
    log_scale = log_scale * logscale_factor
    return tfp.distributions.Normal(
        loc=latent_multiplier * z, scale=tf.exp(log_scale))

Source File: common_layers.py From tensor2tensor with Apache License 2.0

6 votes

def zero_add(previous_value, x, name=None, reuse=None):
  """Resnet connection with zero initialization.

  Another type of resnet connection which returns previous_value + gamma * x.
  gamma is a trainable scalar and initialized with zero. It is useful when a
  module is plugged into a trained model and we want to make sure it matches the
  original model's performance.

  Args:
    previous_value:  A tensor.
    x: A tensor.
    name: name of variable scope; defaults to zero_add.
    reuse: reuse scope.

  Returns:
    previous_value + gamma * x.
  """
  with tf.variable_scope(name, default_name="zero_add", reuse=reuse):
    gamma = tf.get_variable("gamma", (), initializer=tf.zeros_initializer())
    return previous_value + gamma * x

Source File: transformer_glow_layers_ops.py From tensor2tensor with Apache License 2.0

6 votes

def dense_weightnorm(
    name, x, n_out, x_mask, init_scale, init, dtype=tf.float32):
  """Dense layer with weight normalization."""
  n_in = common_layers.shape_list(x)[2]
  eps = tf.keras.backend.epsilon()
  with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
    v = tf.get_variable(
        "v", [n_in, n_out], dtype,
        initializer=tf.random_normal_initializer(0, 0.05), trainable=True)
    v = v / tf.norm(v, axis=0, keepdims=True)
    t = tf.matmul(x, v)  # [B, L, n_out]
    mean, var = moments_over_bl(t, x_mask)
    g_init = init_scale / (tf.sqrt(var) + eps)
    g = get_variable_ddi(
        "g", [n_out], g_init, init,
        initializer=tf.zeros_initializer, dtype=dtype, trainable=True)
    b = get_variable_ddi(
        "b", [n_out], -mean*g_init, init,
        initializer=tf.zeros_initializer, dtype=dtype, trainable=True)
    w = g * v
    y = tf.matmul(x, w) + b
    tf.summary.histogram("_g", g)
    return y

Source File: common_layers.py From tensor2tensor with Apache License 2.0

6 votes

def group_norm(x, filters=None, num_groups=8, epsilon=1e-5):
  """Group normalization as in https://arxiv.org/abs/1803.08494."""
  x_shape = shape_list(x)
  if filters is None:
    filters = x_shape[-1]
  assert len(x_shape) == 4
  assert filters % num_groups == 0
  # Prepare variables.
  scale = tf.get_variable(
      "group_norm_scale", [filters], initializer=tf.ones_initializer())
  bias = tf.get_variable(
      "group_norm_bias", [filters], initializer=tf.zeros_initializer())
  epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]]
  # Reshape and compute group norm.
  x = tf.reshape(x, x_shape[:-1] + [num_groups, filters // num_groups])
  # Calculate mean and variance on heights, width, channels (not groups).
  mean, variance = tf.nn.moments(x, [1, 2, 4], keep_dims=True)
  norm_x = (x - mean) * tf.rsqrt(variance + epsilon)
  return tf.reshape(norm_x, x_shape) * scale + bias

Source File: nas_layers.py From tensor2tensor with Apache License 2.0

6 votes

def _conv_function(self, input_tensor, output_depth, padding):
    input_depth = input_tensor.shape.as_list()[-1]
    if not ((output_depth >= input_depth) and
            (output_depth % input_depth == 0)):
      raise ValueError(
          "Depthwise layer output_depth (%s) must be greater or equal to and "
          "a multiple of the depth of the "
          "input tensor (%s)." % (output_depth, input_depth))
    channel_multiplier = calculate_depthwise_channel_multiplier(
        input_depth, output_depth)
    kernel = tf.get_variable(
        "kernel", [self._conv_width, 1, input_depth, channel_multiplier])
    return tf.nn.depthwise_conv2d(
        input_tensor,
        kernel, [1, 1, 1, 1],
        padding=padding,
        name="depthwise_conv_%sx1" % str(self._conv_width))

Source File: discretization.py From tensor2tensor with Apache License 2.0

6 votes

def get_vq_codebook(codebook_size, hidden_size):
  """Get lookup table for VQ bottleneck."""
  with tf.variable_scope("vq", reuse=tf.AUTO_REUSE):
    means = tf.get_variable(
        name="means",
        shape=[codebook_size, hidden_size],
        initializer=tf.uniform_unit_scaling_initializer())

    ema_count = tf.get_variable(
        name="ema_count",
        shape=[codebook_size],
        initializer=tf.constant_initializer(0),
        trainable=False)

    with tf.colocate_with(means):
      ema_means = tf.get_variable(
          name="ema_means",
          initializer=means.initialized_value(),
          trainable=False)

  return means, ema_means, ema_count

Source File: tiled_linear.py From lamb with Apache License 2.0

6 votes

def _build_tiled_linear(self, inputs, input_name_and_sizes,
                          output_name_and_sizes, add_bias):
    results = []
    for output_name, output_size in output_name_and_sizes:
      r = 0.0
      for input_, (input_name, input_size) in zip(inputs, input_name_and_sizes):
        name = 'W_{}_{}'.format(input_name, output_name)
        weight = self._get_variable(
            name, shape=[output_size, input_size])
        r += tf.sparse_tensor_dense_matmul(weight, input_, adjoint_b=True)
      r = tf.transpose(r)
      if add_bias:
        # Biases are dense, hence we call _get_variable of the base
        # class.
        r += super(SparseTiledLinear, self)._get_variable(
            'B_{}'.format(output_name), shape=[output_size],
            default_initializer=tf.zeros_initializer())
      results.append(r)
    return results


# TODO(melisgl): Since computation is the same as in TiledLinear,
# perhaps this should be implemented as a custom getter (see
# tf.get_variable) instead of being tied to tiling.

Source File: common_image_attention.py From tensor2tensor with Apache License 2.0

6 votes

def get_channel_embeddings(io_depth, targets, hidden_size, name="channel"):
  """Get separate embedding for each of the channels."""
  targets_split = tf.split(targets, io_depth, axis=3)
  rgb_embedding_var = tf.get_variable("rgb_target_emb_%s" % name,
                                      [256 * io_depth, hidden_size])
  rgb_embedding_var = tf.identity(rgb_embedding_var)
  rgb_embedding_var *= float(hidden_size)**0.5
  channel_target_embs = []
  for i in range(io_depth):
    # Adding the channel offsets to get the right embedding since the
    # embedding tensor has shape 256 * io_depth, hidden_size
    target_ids = tf.squeeze(targets_split[i], axis=3) + i * 256
    target_embs = common_layers.gather(rgb_embedding_var, target_ids)
    channel_target_embs.append(target_embs)

  return tf.concat(channel_target_embs, axis=-1)

Source File: tpu_util.py From morph-net with Apache License 2.0

6 votes

def write_to_variable(tensor, fail_if_exists=True):
  """Saves a tensor for later retrieval on CPU."""
  # Only relevant for debugging.
  debug_name = 'tpu_util__' + tensor.name.split(':')[0]

  reuse = False if fail_if_exists else tf.compat.v1.AUTO_REUSE
  with tf.variable_scope(top_level_scope, reuse=reuse):
    variable = tf.get_variable(
        name=debug_name,
        shape=tensor.shape,
        dtype=tensor.dtype,
        trainable=False,
        use_resource=True)

  var_store[tensor] = variable
  with tf.control_dependencies([variable.assign(tensor)]):
    tensor_copy = tf.identity(tensor)
  var_store[tensor_copy] = variable
  return tensor_copy

Source File: batch_norm_source_op_handler_test.py From morph-net with Apache License 2.0

6 votes

def testCreateRegularizer_Sliced(self):
    # Call handler to create regularizer.
    handler = batch_norm_source_op_handler.BatchNormSourceOpHandler(
        _GAMMA_THRESHOLD)
    batch_norm_op_slice = orm.OpSlice(self.batch_norm_op, orm.Slice(0, 3))
    regularizer = handler.create_regularizer(batch_norm_op_slice)

    # Verify regularizer is the gamma tensor.
    with self.cached_session():
      # Initialize the gamma tensor to check value equality.
      with tf.variable_scope('', reuse=tf.AUTO_REUSE):
        gamma_tensor = tf.get_variable('conv1/BatchNorm/gamma')
      init = tf.variables_initializer([gamma_tensor])
      init.run()

      # Verify regularizer is the sliced gamma tensor.
      self.assertAllEqual(gamma_tensor.eval()[0:3],
                          regularizer._gamma.eval())

Source File: utils.py From lamb with Apache License 2.0

6 votes

def layer_norm(x, reduction_indices, epsilon=1e-9, gain=None, bias=None,
               per_element=True, scope=None):
  """DOC."""
  reduction_indices = ensure_list(reduction_indices)
  mean = tf.reduce_mean(x, reduction_indices, keep_dims=True)
  variance = tf.reduce_mean(tf.squared_difference(x, mean),
                            reduction_indices, keep_dims=True)
  normalized = (x - mean) / tf.sqrt(variance + epsilon)
  dtype = x.dtype
  shape = x.get_shape().as_list()
  for i in six.moves.range(len(shape)):
    if i not in reduction_indices or not per_element:
      shape[i] = 1
  with tf.variable_scope(scope or 'layer_norm'):
    if gain is None:
      gain = tf.get_variable('gain', shape=shape, dtype=dtype,
                             initializer=tf.ones_initializer())
    if bias is None:
      bias = tf.get_variable('bias', shape=shape, dtype=dtype,
                             initializer=tf.zeros_initializer())
  return gain*normalized+bias

Source File: evaluator.py From graphics with Apache License 2.0

6 votes

def _init_graph(self):
    """Initialize computation graph for tensorflow.
    """
    with self.graph.as_default():
      self.refiner = im.ImNet(dim=self.dim,
                              in_features=self.codelen,
                              out_features=self.out_features,
                              num_filters=self.num_filters)
      self.global_step = tf.get_variable('global_step', shape=[],
                                         dtype=tf.int64)

      self.pts_ph = tf.placeholder(tf.float32, shape=[self.point_batch, 3])
      self.lat_ph = tf.placeholder(tf.float32, shape=[self.codelen])

      lat = tf.broadcast_to(self.lat_ph[tf.newaxis],
                            [self.point_batch, self.codelen])
      code = tf.concat((self.pts_ph, lat), axis=-1)  # [pb, 3+c]

      vals = self.refiner(code, training=False)  # [pb, 1]
      self.vals = tf.squeeze(vals, axis=1)  # [pb]
      self.saver = tf.train.Saver()
      self.sess = tf.Session()
      self.saver.restore(self.sess, self.ckpt)

Source File: evaluator.py From graphics with Apache License 2.0

6 votes

def _init_graph(self):
    """Initialize computation graph for tensorflow."""
    with self.graph.as_default():
      self.encoder = g2v.GridEncoder(
          in_grid_res=self.in_grid_res,
          num_filters=self.num_filters,
          codelen=self.codelen,
          name='g2v')
      self.global_step = tf.get_variable(
          'global_step', shape=[], dtype=tf.int64)
      self.grid_ph = tf.placeholder(
          tf.float32, shape=[self.gres, self.gres, self.gres])
      self.start_ph = tf.placeholder(tf.int32, shape=[self.grid_batch, 3])
      self.ingrid = self._batch_slice(self.grid_ph, self.start_ph,
                                      self.in_grid_res, self.grid_batch)
      self.ingrid = self.ingrid[..., tf.newaxis]
      self.lats = self.encoder(self.ingrid, training=False)  # [gb, codelen]
      self.saver = tf.train.Saver()
      self.sess = tf.Session()
      self.saver.restore(self.sess, self.ckpt)

Source File: averaged.py From lamb with Apache License 2.0

6 votes

def __init__(self, tensors):
    tensors = list(tensors)
    with tf.variable_scope('averaged'):
      self._num_samples = tf.Variable(0, name='num_samples', trainable=False)
      with tf.variable_scope('avg'):
        self._averages = [
            tf.get_variable(
                tensor.name.replace('/', '-').replace(':', '-'),
                tensor.get_shape(), initializer=tf.zeros_initializer(),
                trainable=False)
            for tensor in tensors]
      with tf.variable_scope('save'):
        self._saves = [
            tf.get_variable(
                tensor.name.replace('/', '-').replace(':', '-'),
                tensor.get_shape(), initializer=tf.zeros_initializer(),
                trainable=False)
            for tensor in tensors]
    self._tensors = tensors
    self._take_sample = self._make_take_sample()
    self._switch = self._make_swith_to_average()
    self._restore = self._make_restore()
    self._reset = self._make_reset()

Source File: optimization_test.py From albert with Apache License 2.0

6 votes

def test_adam(self):
    with self.test_session() as sess:
      w = tf.get_variable(
          "w",
          shape=[3],
          initializer=tf.constant_initializer([0.1, -0.2, -0.1]))
      x = tf.constant([0.4, 0.2, -0.5])
      loss = tf.reduce_mean(tf.square(x - w))
      tvars = tf.trainable_variables()
      grads = tf.gradients(loss, tvars)
      global_step = tf.train.get_or_create_global_step()
      optimizer = optimization.AdamWeightDecayOptimizer(learning_rate=0.2)
      train_op = optimizer.apply_gradients(list(zip(grads, tvars)), global_step)
      init_op = tf.group(tf.global_variables_initializer(),
                         tf.local_variables_initializer())
      sess.run(init_op)
      for _ in range(100):
        sess.run(train_op)
      w_np = sess.run(w)
      self.assertAllClose(w_np.flat, [0.4, 0.2, -0.5], rtol=1e-2, atol=1e-2)

Source File: export_checkpoints.py From albert with Apache License 2.0

6 votes

def get_mlm_logits(input_tensor, albert_config, mlm_positions, output_weights):
  """From run_pretraining.py."""
  input_tensor = gather_indexes(input_tensor, mlm_positions)
  with tf.variable_scope("cls/predictions"):
    # We apply one more non-linear transformation before the output layer.
    # This matrix is not used after pre-training.
    with tf.variable_scope("transform"):
      input_tensor = tf.layers.dense(
          input_tensor,
          units=albert_config.embedding_size,
          activation=modeling.get_activation(albert_config.hidden_act),
          kernel_initializer=modeling.create_initializer(
              albert_config.initializer_range))
      input_tensor = modeling.layer_norm(input_tensor)

    # The output weights are the same as the input embeddings, but there is
    # an output-only bias for each token.
    output_bias = tf.get_variable(
        "output_bias",
        shape=[albert_config.vocab_size],
        initializer=tf.zeros_initializer())
    logits = tf.matmul(
        input_tensor, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
  return logits

Source File: convnet_builder.py From benchmarks with Apache License 2.0

6 votes

def _conv2d_impl(self, input_layer, num_channels_in, filters, kernel_size,
                   strides, padding, kernel_initializer):
    if self.use_tf_layers:
      return conv_layers.conv2d(input_layer, filters, kernel_size, strides,
                                padding, self.channel_pos,
                                kernel_initializer=kernel_initializer,
                                use_bias=False)
    else:
      weights_shape = [kernel_size[0], kernel_size[1], num_channels_in, filters]
      # We use the name 'conv2d/kernel' so the variable has the same name as its
      # tf.layers equivalent. This way, if a checkpoint is written when
      # self.use_tf_layers == True, it can be loaded when
      # self.use_tf_layers == False, and vice versa.
      weights = self.get_variable('conv2d/kernel', weights_shape,
                                  self.variable_dtype, self.dtype,
                                  initializer=kernel_initializer)
      if self.data_format == 'NHWC':
        strides = [1] + strides + [1]
      else:
        strides = [1, 1] + strides
      return tf.nn.conv2d(input_layer, weights, strides, padding,
                          data_format=self.data_format)

Source File: sv2p.py From tensor2tensor with Apache License 2.0

6 votes

def init_internal_states(self):
    # Hardcoded LSTM-CONV shapes.
    # These sizes are calculated based on original atari frames.
    # TODO(mbz): find a cleaner way of doing this maybe?!
    batch_size = self.hparams.batch_size
    shapes = [(batch_size, 53, 40, 8),
              (batch_size, 53, 40, 8),
              (batch_size, 27, 20, 16),
              (batch_size, 27, 20, 16),
              (batch_size, 53, 40, 8)]

    with tf.variable_scope("clean_scope"):
      # Initialize conv-lstm states with zeros
      init = tf.zeros_initializer()
      states = []
      for i, shape in enumerate(shapes):
        # every lstm-conv state has two variables named c and h.
        c = tf.get_variable("c%d" % i, shape, trainable=False, initializer=init)
        h = tf.get_variable("h%d" % i, shape, trainable=False, initializer=init)
        states.append((c, h))
      return states

Source File: utils.py From gpt2-ml with Apache License 2.0

5 votes

def layer_norm(input_tensor, name=None, epsilon=1e-5):
    """Run layer normalization on the last dimension of the tensor."""
    name2use = f'LayerNorm_{name}' if name is not None else name
    with tf.variable_scope(name2use, default_name='LayerNorm'):
        dim = input_tensor.shape[-1].value
        gamma = tf.get_variable('gamma', [dim], initializer=tf.constant_initializer(1))
        beta = tf.get_variable('beta', [dim], initializer=tf.constant_initializer(0))
        mean = tf.reduce_mean(input_tensor, axis=-1, keepdims=True)
        std = tf.reduce_mean(tf.square(input_tensor - mean), axis=-1, keepdims=True)
        input_tensor = (input_tensor - mean) * tf.rsqrt(std + epsilon)
        input_tensor = input_tensor * gamma + beta
    return input_tensor

Source File: common_layers.py From tensor2tensor with Apache License 2.0

5 votes

def embedding(x,
              vocab_size,
              dense_size,
              name=None,
              reuse=None,
              multiplier=1.0,
              symbol_dropout_rate=0.0,
              embedding_var=None,
              dtype=tf.float32):
  """Embed x of type int64 into dense vectors, reducing to max 4 dimensions."""
  with tf.variable_scope(
      name, default_name="embedding", values=[x], reuse=reuse, dtype=dtype):
    if embedding_var is None:
      embedding_var = tf.get_variable("kernel", [vocab_size, dense_size])
    # On the backwards pass, we want to convert the gradient from
    # an indexed-slices to a regular tensor before sending it back to the
    # parameter server. This avoids excess computation on the parameter server.
    if not tf.executing_eagerly():
      embedding_var = convert_gradient_to_tensor(embedding_var)
    x = dropout_no_scaling(x, 1.0 - symbol_dropout_rate)
    emb_x = gather(embedding_var, x, dtype)
    if multiplier != 1.0:
      emb_x *= multiplier
    static_shape = emb_x.shape.as_list()
    if len(static_shape) < 5:
      return emb_x
    assert len(static_shape) == 5
    # If we had an extra channel dimension, assume it's 1, i.e. shape[3] == 1.
    return tf.squeeze(emb_x, 3)

Source File: common_layers.py From tensor2tensor with Apache License 2.0

5 votes

def nalu(x, depth, epsilon=1e-30, name=None, reuse=None):
  """NALU as in https://arxiv.org/abs/1808.00508."""
  with tf.variable_scope(name, default_name="nalu", values=[x], reuse=reuse):
    x_shape = shape_list(x)
    x_flat = tf.reshape(x, [-1, x_shape[-1]])
    gw = tf.get_variable("w", [x_shape[-1], depth])
    g = tf.nn.sigmoid(tf.matmul(x_flat, gw))
    g = tf.reshape(g, x_shape[:-1] + [depth])
    a = nac(x, depth, name="nac_lin")
    log_x = tf.log(tf.abs(x) + epsilon)
    m = nac(log_x, depth, name="nac_log")
    return g * a + (1 - g) * tf.exp(m)

Source File: vq_discrete.py From tensor2tensor with Apache License 2.0

5 votes

def __init__(self, hparams):
    self.hparams = hparams
    print ("self.hparams.z_size", self.hparams.z_size)
    # Set the discretization bottleneck specific things here
    self.hparams.z_size_per_residual = self.hparams.z_size // \
                                       self.hparams.num_residuals
    print ("self.hparams.num_residuals", self.hparams.num_residuals)
    self.hparams.block_dim = int(
        self.hparams.hidden_size // self.hparams.num_blocks)
    self.hparams.block_v_size = 2**(
        self.hparams.z_size_per_residual / self.hparams.num_blocks)
    self.hparams.block_v_size = int(self.hparams.block_v_size)
    self.means = tf.get_variable(
        name="means",
        shape=[
            self.hparams.num_blocks, self.hparams.block_v_size,
            self.hparams.block_dim
        ],
        initializer=tf.initializers.variance_scaling(distribution="uniform"))

    # Create the shadow variables if we are using EMA
    if self.hparams.ema:
      self.ema_count = tf.get_variable(
          "ema_count", [self.hparams.num_blocks, self.hparams.block_v_size],
          initializer=tf.constant_initializer(0),
          trainable=False)
      with tf.colocate_with(self.means):
        self.ema_means = tf.get_variable(
            "ema_means",
            initializer=self.means.initialized_value(),
            trainable=False)

Source File: common_layers.py From tensor2tensor with Apache License 2.0

5 votes

def nac(x, depth, name=None, reuse=None):
  """NAC as in https://arxiv.org/abs/1808.00508."""
  with tf.variable_scope(name, default_name="nac", values=[x], reuse=reuse):
    x_shape = shape_list(x)
    w = tf.get_variable("w", [x_shape[-1], depth])
    m = tf.get_variable("m", [x_shape[-1], depth])
    w = tf.tanh(w) * tf.nn.sigmoid(m)
    x_flat = tf.reshape(x, [-1, x_shape[-1]])
    res_flat = tf.matmul(x_flat, w)
    return tf.reshape(res_flat, x_shape[:-1] + [depth])

Source File: modalities.py From tensor2tensor with Apache License 2.0

5 votes

def get_weights(model_hparams, vocab_size, hidden_dim=None):
  """Create or get concatenated embedding or softmax variable.

  Args:
    model_hparams: HParams, model hyperparmeters.
    vocab_size: int, vocabulary size.
    hidden_dim: dim of the variable. Defaults to _model_hparams' hidden_size

  Returns:
     a list of num_shards Tensors.
  """
  if hidden_dim is None:
    hidden_dim = model_hparams.hidden_size
  num_shards = model_hparams.symbol_modality_num_shards
  shards = []
  for i in range(num_shards):
    shard_size = (vocab_size // num_shards) + (
        1 if i < vocab_size % num_shards else 0)
    var_name = "weights_%d" % i
    shards.append(
        tf.get_variable(
            var_name, [shard_size, hidden_dim],
            initializer=tf.random_normal_initializer(0.0, hidden_dim**-0.5)))
  if num_shards == 1:
    ret = shards[0]
  else:
    ret = tf.concat(shards, 0)
  # Convert ret to tensor.
  if not tf.executing_eagerly():
    ret = common_layers.convert_gradient_to_tensor(ret)
  return ret

Source File: common_layers.py From tensor2tensor with Apache License 2.0

5 votes

def l2_norm(x, filters=None, epsilon=1e-6, name=None, reuse=None):
  """Layer normalization with l2 norm."""
  if filters is None:
    filters = shape_list(x)[-1]
  with tf.variable_scope(name, default_name="l2_norm", values=[x], reuse=reuse):
    scale = tf.get_variable(
        "l2_norm_scale", [filters], initializer=tf.ones_initializer())
    bias = tf.get_variable(
        "l2_norm_bias", [filters], initializer=tf.zeros_initializer())
    epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]]
    mean = tf.reduce_mean(x, axis=[-1], keepdims=True)
    l2norm = tf.reduce_sum(
        tf.squared_difference(x, mean), axis=[-1], keepdims=True)
    norm_x = (x - mean) * tf.rsqrt(l2norm + epsilon)
    return norm_x * scale + bias

Source File: common_layers.py From tensor2tensor with Apache License 2.0

5 votes

def layer_norm_vars(filters):
  """Create Variables for layer norm."""
  scale = tf.get_variable(
      "layer_norm_scale", [filters], initializer=tf.ones_initializer())
  bias = tf.get_variable(
      "layer_norm_bias", [filters], initializer=tf.zeros_initializer())
  return scale, bias

Python tensorflow.compat.v1.get_variable() Examples