Python tensorflow.compat.v1.get_variable() Examples
The following are 30
code examples of tensorflow.compat.v1.get_variable().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.compat.v1
, or try the search function
.
Example #1
Source File: variable_mgr.py From benchmarks with Apache License 2.0 | 6 votes |
def get_gradients_to_apply(self, device_num, gradient_state): device_grads = gradient_state # From 2nd result of preprocess_device_grads. avg_grads, self.grad_has_inf_nan = ( variable_mgr_util.aggregate_gradients_using_copy_with_device_selection( self.benchmark_cnn, device_grads, use_mean=True, check_inf_nan=self.benchmark_cnn.enable_auto_loss_scale)) # Make shadow variable on a parameter server for each original trainable # variable. for i, (g, v) in enumerate(avg_grads): my_name = variable_mgr_util.PS_SHADOW_VAR_PREFIX + '/' + v.name if my_name.endswith(':0'): my_name = my_name[:-2] new_v = tf.get_variable( my_name, dtype=v.dtype.base_dtype, initializer=v.initial_value, trainable=True) avg_grads[i] = (g, new_v) return avg_grads
Example #2
Source File: universal_transformer_util.py From tensor2tensor with Apache License 2.0 | 6 votes |
def add_depth_embedding(x): """Add n-dimensional embedding as the depth embedding (timing signal). Adds embeddings to represent the position of the step in the recurrent tower. Args: x: a tensor with shape [max_step, batch, length, depth] Returns: a Tensor the same shape as x. """ x_shape = common_layers.shape_list(x) depth = x_shape[-1] num_steps = x_shape[0] shape = [num_steps, 1, 1, depth] depth_embedding = ( tf.get_variable( "depth_embedding", shape, initializer=tf.random_normal_initializer(0, depth**-0.5)) * (depth** 0.5)) x += depth_embedding return x
Example #3
Source File: common_layers_test.py From tensor2tensor with Apache License 2.0 | 6 votes |
def testSpectralNorm(self): # Test that after 20 calls to apply_spectral_norm, the spectral # norm of the normalized matrix is close to 1.0 with tf.Graph().as_default(): weights = tf.get_variable("w", dtype=tf.float32, shape=[2, 3, 50, 100]) weights = tf.multiply(weights, 10.0) normed_weight, assign_op = common_layers.apply_spectral_norm(weights) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for _ in range(20): sess.run(assign_op) normed_weight, assign_op = common_layers.apply_spectral_norm( weights) normed_weight = sess.run(normed_weight).reshape(-1, 100) _, s, _ = np.linalg.svd(normed_weight) self.assertTrue(np.allclose(s[0], 1.0, rtol=0.1))
Example #4
Source File: transformer_nat.py From tensor2tensor with Apache License 2.0 | 6 votes |
def init_vq_bottleneck(bottleneck_size, hidden_size): """Get lookup table for VQ bottleneck.""" means = tf.get_variable( name="means", shape=[bottleneck_size, hidden_size], initializer=tf.uniform_unit_scaling_initializer()) ema_count = tf.get_variable( name="ema_count", shape=[bottleneck_size], initializer=tf.constant_initializer(0), trainable=False) with tf.colocate_with(means): ema_means = tf.get_variable( name="ema_means", initializer=means.initialized_value(), trainable=False) return means, ema_means, ema_count
Example #5
Source File: glow_ops.py From tensor2tensor with Apache License 2.0 | 6 votes |
def scale_gaussian_prior(name, z, logscale_factor=3.0, trainable=True): """Returns N(s^i * z^i, std^i) where s^i and std^i are pre-component. s^i is a learnable parameter with identity initialization. std^i is optionally learnable with identity initialization. Args: name: variable scope. z: input_tensor logscale_factor: equivalent to scaling up the learning_rate by a factor of logscale_factor. trainable: Whether or not std^i is learnt. """ with tf.variable_scope(name, reuse=tf.AUTO_REUSE): z_shape = common_layers.shape_list(z) latent_multiplier = tf.get_variable( "latent_multiplier", shape=z_shape, dtype=tf.float32, initializer=tf.ones_initializer()) log_scale = tf.get_variable( "log_scale_latent", shape=z_shape, dtype=tf.float32, initializer=tf.zeros_initializer(), trainable=trainable) log_scale = log_scale * logscale_factor return tfp.distributions.Normal( loc=latent_multiplier * z, scale=tf.exp(log_scale))
Example #6
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 6 votes |
def zero_add(previous_value, x, name=None, reuse=None): """Resnet connection with zero initialization. Another type of resnet connection which returns previous_value + gamma * x. gamma is a trainable scalar and initialized with zero. It is useful when a module is plugged into a trained model and we want to make sure it matches the original model's performance. Args: previous_value: A tensor. x: A tensor. name: name of variable scope; defaults to zero_add. reuse: reuse scope. Returns: previous_value + gamma * x. """ with tf.variable_scope(name, default_name="zero_add", reuse=reuse): gamma = tf.get_variable("gamma", (), initializer=tf.zeros_initializer()) return previous_value + gamma * x
Example #7
Source File: transformer_glow_layers_ops.py From tensor2tensor with Apache License 2.0 | 6 votes |
def dense_weightnorm( name, x, n_out, x_mask, init_scale, init, dtype=tf.float32): """Dense layer with weight normalization.""" n_in = common_layers.shape_list(x)[2] eps = tf.keras.backend.epsilon() with tf.variable_scope(name, reuse=tf.AUTO_REUSE): v = tf.get_variable( "v", [n_in, n_out], dtype, initializer=tf.random_normal_initializer(0, 0.05), trainable=True) v = v / tf.norm(v, axis=0, keepdims=True) t = tf.matmul(x, v) # [B, L, n_out] mean, var = moments_over_bl(t, x_mask) g_init = init_scale / (tf.sqrt(var) + eps) g = get_variable_ddi( "g", [n_out], g_init, init, initializer=tf.zeros_initializer, dtype=dtype, trainable=True) b = get_variable_ddi( "b", [n_out], -mean*g_init, init, initializer=tf.zeros_initializer, dtype=dtype, trainable=True) w = g * v y = tf.matmul(x, w) + b tf.summary.histogram("_g", g) return y
Example #8
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 6 votes |
def group_norm(x, filters=None, num_groups=8, epsilon=1e-5): """Group normalization as in https://arxiv.org/abs/1803.08494.""" x_shape = shape_list(x) if filters is None: filters = x_shape[-1] assert len(x_shape) == 4 assert filters % num_groups == 0 # Prepare variables. scale = tf.get_variable( "group_norm_scale", [filters], initializer=tf.ones_initializer()) bias = tf.get_variable( "group_norm_bias", [filters], initializer=tf.zeros_initializer()) epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]] # Reshape and compute group norm. x = tf.reshape(x, x_shape[:-1] + [num_groups, filters // num_groups]) # Calculate mean and variance on heights, width, channels (not groups). mean, variance = tf.nn.moments(x, [1, 2, 4], keep_dims=True) norm_x = (x - mean) * tf.rsqrt(variance + epsilon) return tf.reshape(norm_x, x_shape) * scale + bias
Example #9
Source File: nas_layers.py From tensor2tensor with Apache License 2.0 | 6 votes |
def _conv_function(self, input_tensor, output_depth, padding): input_depth = input_tensor.shape.as_list()[-1] if not ((output_depth >= input_depth) and (output_depth % input_depth == 0)): raise ValueError( "Depthwise layer output_depth (%s) must be greater or equal to and " "a multiple of the depth of the " "input tensor (%s)." % (output_depth, input_depth)) channel_multiplier = calculate_depthwise_channel_multiplier( input_depth, output_depth) kernel = tf.get_variable( "kernel", [self._conv_width, 1, input_depth, channel_multiplier]) return tf.nn.depthwise_conv2d( input_tensor, kernel, [1, 1, 1, 1], padding=padding, name="depthwise_conv_%sx1" % str(self._conv_width))
Example #10
Source File: discretization.py From tensor2tensor with Apache License 2.0 | 6 votes |
def get_vq_codebook(codebook_size, hidden_size): """Get lookup table for VQ bottleneck.""" with tf.variable_scope("vq", reuse=tf.AUTO_REUSE): means = tf.get_variable( name="means", shape=[codebook_size, hidden_size], initializer=tf.uniform_unit_scaling_initializer()) ema_count = tf.get_variable( name="ema_count", shape=[codebook_size], initializer=tf.constant_initializer(0), trainable=False) with tf.colocate_with(means): ema_means = tf.get_variable( name="ema_means", initializer=means.initialized_value(), trainable=False) return means, ema_means, ema_count
Example #11
Source File: tiled_linear.py From lamb with Apache License 2.0 | 6 votes |
def _build_tiled_linear(self, inputs, input_name_and_sizes, output_name_and_sizes, add_bias): results = [] for output_name, output_size in output_name_and_sizes: r = 0.0 for input_, (input_name, input_size) in zip(inputs, input_name_and_sizes): name = 'W_{}_{}'.format(input_name, output_name) weight = self._get_variable( name, shape=[output_size, input_size]) r += tf.sparse_tensor_dense_matmul(weight, input_, adjoint_b=True) r = tf.transpose(r) if add_bias: # Biases are dense, hence we call _get_variable of the base # class. r += super(SparseTiledLinear, self)._get_variable( 'B_{}'.format(output_name), shape=[output_size], default_initializer=tf.zeros_initializer()) results.append(r) return results # TODO(melisgl): Since computation is the same as in TiledLinear, # perhaps this should be implemented as a custom getter (see # tf.get_variable) instead of being tied to tiling.
Example #12
Source File: common_image_attention.py From tensor2tensor with Apache License 2.0 | 6 votes |
def get_channel_embeddings(io_depth, targets, hidden_size, name="channel"): """Get separate embedding for each of the channels.""" targets_split = tf.split(targets, io_depth, axis=3) rgb_embedding_var = tf.get_variable("rgb_target_emb_%s" % name, [256 * io_depth, hidden_size]) rgb_embedding_var = tf.identity(rgb_embedding_var) rgb_embedding_var *= float(hidden_size)**0.5 channel_target_embs = [] for i in range(io_depth): # Adding the channel offsets to get the right embedding since the # embedding tensor has shape 256 * io_depth, hidden_size target_ids = tf.squeeze(targets_split[i], axis=3) + i * 256 target_embs = common_layers.gather(rgb_embedding_var, target_ids) channel_target_embs.append(target_embs) return tf.concat(channel_target_embs, axis=-1)
Example #13
Source File: tpu_util.py From morph-net with Apache License 2.0 | 6 votes |
def write_to_variable(tensor, fail_if_exists=True): """Saves a tensor for later retrieval on CPU.""" # Only relevant for debugging. debug_name = 'tpu_util__' + tensor.name.split(':')[0] reuse = False if fail_if_exists else tf.compat.v1.AUTO_REUSE with tf.variable_scope(top_level_scope, reuse=reuse): variable = tf.get_variable( name=debug_name, shape=tensor.shape, dtype=tensor.dtype, trainable=False, use_resource=True) var_store[tensor] = variable with tf.control_dependencies([variable.assign(tensor)]): tensor_copy = tf.identity(tensor) var_store[tensor_copy] = variable return tensor_copy
Example #14
Source File: batch_norm_source_op_handler_test.py From morph-net with Apache License 2.0 | 6 votes |
def testCreateRegularizer_Sliced(self): # Call handler to create regularizer. handler = batch_norm_source_op_handler.BatchNormSourceOpHandler( _GAMMA_THRESHOLD) batch_norm_op_slice = orm.OpSlice(self.batch_norm_op, orm.Slice(0, 3)) regularizer = handler.create_regularizer(batch_norm_op_slice) # Verify regularizer is the gamma tensor. with self.cached_session(): # Initialize the gamma tensor to check value equality. with tf.variable_scope('', reuse=tf.AUTO_REUSE): gamma_tensor = tf.get_variable('conv1/BatchNorm/gamma') init = tf.variables_initializer([gamma_tensor]) init.run() # Verify regularizer is the sliced gamma tensor. self.assertAllEqual(gamma_tensor.eval()[0:3], regularizer._gamma.eval())
Example #15
Source File: utils.py From lamb with Apache License 2.0 | 6 votes |
def layer_norm(x, reduction_indices, epsilon=1e-9, gain=None, bias=None, per_element=True, scope=None): """DOC.""" reduction_indices = ensure_list(reduction_indices) mean = tf.reduce_mean(x, reduction_indices, keep_dims=True) variance = tf.reduce_mean(tf.squared_difference(x, mean), reduction_indices, keep_dims=True) normalized = (x - mean) / tf.sqrt(variance + epsilon) dtype = x.dtype shape = x.get_shape().as_list() for i in six.moves.range(len(shape)): if i not in reduction_indices or not per_element: shape[i] = 1 with tf.variable_scope(scope or 'layer_norm'): if gain is None: gain = tf.get_variable('gain', shape=shape, dtype=dtype, initializer=tf.ones_initializer()) if bias is None: bias = tf.get_variable('bias', shape=shape, dtype=dtype, initializer=tf.zeros_initializer()) return gain*normalized+bias
Example #16
Source File: evaluator.py From graphics with Apache License 2.0 | 6 votes |
def _init_graph(self): """Initialize computation graph for tensorflow. """ with self.graph.as_default(): self.refiner = im.ImNet(dim=self.dim, in_features=self.codelen, out_features=self.out_features, num_filters=self.num_filters) self.global_step = tf.get_variable('global_step', shape=[], dtype=tf.int64) self.pts_ph = tf.placeholder(tf.float32, shape=[self.point_batch, 3]) self.lat_ph = tf.placeholder(tf.float32, shape=[self.codelen]) lat = tf.broadcast_to(self.lat_ph[tf.newaxis], [self.point_batch, self.codelen]) code = tf.concat((self.pts_ph, lat), axis=-1) # [pb, 3+c] vals = self.refiner(code, training=False) # [pb, 1] self.vals = tf.squeeze(vals, axis=1) # [pb] self.saver = tf.train.Saver() self.sess = tf.Session() self.saver.restore(self.sess, self.ckpt)
Example #17
Source File: evaluator.py From graphics with Apache License 2.0 | 6 votes |
def _init_graph(self): """Initialize computation graph for tensorflow.""" with self.graph.as_default(): self.encoder = g2v.GridEncoder( in_grid_res=self.in_grid_res, num_filters=self.num_filters, codelen=self.codelen, name='g2v') self.global_step = tf.get_variable( 'global_step', shape=[], dtype=tf.int64) self.grid_ph = tf.placeholder( tf.float32, shape=[self.gres, self.gres, self.gres]) self.start_ph = tf.placeholder(tf.int32, shape=[self.grid_batch, 3]) self.ingrid = self._batch_slice(self.grid_ph, self.start_ph, self.in_grid_res, self.grid_batch) self.ingrid = self.ingrid[..., tf.newaxis] self.lats = self.encoder(self.ingrid, training=False) # [gb, codelen] self.saver = tf.train.Saver() self.sess = tf.Session() self.saver.restore(self.sess, self.ckpt)
Example #18
Source File: averaged.py From lamb with Apache License 2.0 | 6 votes |
def __init__(self, tensors): tensors = list(tensors) with tf.variable_scope('averaged'): self._num_samples = tf.Variable(0, name='num_samples', trainable=False) with tf.variable_scope('avg'): self._averages = [ tf.get_variable( tensor.name.replace('/', '-').replace(':', '-'), tensor.get_shape(), initializer=tf.zeros_initializer(), trainable=False) for tensor in tensors] with tf.variable_scope('save'): self._saves = [ tf.get_variable( tensor.name.replace('/', '-').replace(':', '-'), tensor.get_shape(), initializer=tf.zeros_initializer(), trainable=False) for tensor in tensors] self._tensors = tensors self._take_sample = self._make_take_sample() self._switch = self._make_swith_to_average() self._restore = self._make_restore() self._reset = self._make_reset()
Example #19
Source File: optimization_test.py From albert with Apache License 2.0 | 6 votes |
def test_adam(self): with self.test_session() as sess: w = tf.get_variable( "w", shape=[3], initializer=tf.constant_initializer([0.1, -0.2, -0.1])) x = tf.constant([0.4, 0.2, -0.5]) loss = tf.reduce_mean(tf.square(x - w)) tvars = tf.trainable_variables() grads = tf.gradients(loss, tvars) global_step = tf.train.get_or_create_global_step() optimizer = optimization.AdamWeightDecayOptimizer(learning_rate=0.2) train_op = optimizer.apply_gradients(list(zip(grads, tvars)), global_step) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) for _ in range(100): sess.run(train_op) w_np = sess.run(w) self.assertAllClose(w_np.flat, [0.4, 0.2, -0.5], rtol=1e-2, atol=1e-2)
Example #20
Source File: export_checkpoints.py From albert with Apache License 2.0 | 6 votes |
def get_mlm_logits(input_tensor, albert_config, mlm_positions, output_weights): """From run_pretraining.py.""" input_tensor = gather_indexes(input_tensor, mlm_positions) with tf.variable_scope("cls/predictions"): # We apply one more non-linear transformation before the output layer. # This matrix is not used after pre-training. with tf.variable_scope("transform"): input_tensor = tf.layers.dense( input_tensor, units=albert_config.embedding_size, activation=modeling.get_activation(albert_config.hidden_act), kernel_initializer=modeling.create_initializer( albert_config.initializer_range)) input_tensor = modeling.layer_norm(input_tensor) # The output weights are the same as the input embeddings, but there is # an output-only bias for each token. output_bias = tf.get_variable( "output_bias", shape=[albert_config.vocab_size], initializer=tf.zeros_initializer()) logits = tf.matmul( input_tensor, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) return logits
Example #21
Source File: convnet_builder.py From benchmarks with Apache License 2.0 | 6 votes |
def _conv2d_impl(self, input_layer, num_channels_in, filters, kernel_size, strides, padding, kernel_initializer): if self.use_tf_layers: return conv_layers.conv2d(input_layer, filters, kernel_size, strides, padding, self.channel_pos, kernel_initializer=kernel_initializer, use_bias=False) else: weights_shape = [kernel_size[0], kernel_size[1], num_channels_in, filters] # We use the name 'conv2d/kernel' so the variable has the same name as its # tf.layers equivalent. This way, if a checkpoint is written when # self.use_tf_layers == True, it can be loaded when # self.use_tf_layers == False, and vice versa. weights = self.get_variable('conv2d/kernel', weights_shape, self.variable_dtype, self.dtype, initializer=kernel_initializer) if self.data_format == 'NHWC': strides = [1] + strides + [1] else: strides = [1, 1] + strides return tf.nn.conv2d(input_layer, weights, strides, padding, data_format=self.data_format)
Example #22
Source File: sv2p.py From tensor2tensor with Apache License 2.0 | 6 votes |
def init_internal_states(self): # Hardcoded LSTM-CONV shapes. # These sizes are calculated based on original atari frames. # TODO(mbz): find a cleaner way of doing this maybe?! batch_size = self.hparams.batch_size shapes = [(batch_size, 53, 40, 8), (batch_size, 53, 40, 8), (batch_size, 27, 20, 16), (batch_size, 27, 20, 16), (batch_size, 53, 40, 8)] with tf.variable_scope("clean_scope"): # Initialize conv-lstm states with zeros init = tf.zeros_initializer() states = [] for i, shape in enumerate(shapes): # every lstm-conv state has two variables named c and h. c = tf.get_variable("c%d" % i, shape, trainable=False, initializer=init) h = tf.get_variable("h%d" % i, shape, trainable=False, initializer=init) states.append((c, h)) return states
Example #23
Source File: utils.py From gpt2-ml with Apache License 2.0 | 5 votes |
def layer_norm(input_tensor, name=None, epsilon=1e-5): """Run layer normalization on the last dimension of the tensor.""" name2use = f'LayerNorm_{name}' if name is not None else name with tf.variable_scope(name2use, default_name='LayerNorm'): dim = input_tensor.shape[-1].value gamma = tf.get_variable('gamma', [dim], initializer=tf.constant_initializer(1)) beta = tf.get_variable('beta', [dim], initializer=tf.constant_initializer(0)) mean = tf.reduce_mean(input_tensor, axis=-1, keepdims=True) std = tf.reduce_mean(tf.square(input_tensor - mean), axis=-1, keepdims=True) input_tensor = (input_tensor - mean) * tf.rsqrt(std + epsilon) input_tensor = input_tensor * gamma + beta return input_tensor
Example #24
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 5 votes |
def embedding(x, vocab_size, dense_size, name=None, reuse=None, multiplier=1.0, symbol_dropout_rate=0.0, embedding_var=None, dtype=tf.float32): """Embed x of type int64 into dense vectors, reducing to max 4 dimensions.""" with tf.variable_scope( name, default_name="embedding", values=[x], reuse=reuse, dtype=dtype): if embedding_var is None: embedding_var = tf.get_variable("kernel", [vocab_size, dense_size]) # On the backwards pass, we want to convert the gradient from # an indexed-slices to a regular tensor before sending it back to the # parameter server. This avoids excess computation on the parameter server. if not tf.executing_eagerly(): embedding_var = convert_gradient_to_tensor(embedding_var) x = dropout_no_scaling(x, 1.0 - symbol_dropout_rate) emb_x = gather(embedding_var, x, dtype) if multiplier != 1.0: emb_x *= multiplier static_shape = emb_x.shape.as_list() if len(static_shape) < 5: return emb_x assert len(static_shape) == 5 # If we had an extra channel dimension, assume it's 1, i.e. shape[3] == 1. return tf.squeeze(emb_x, 3)
Example #25
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 5 votes |
def nalu(x, depth, epsilon=1e-30, name=None, reuse=None): """NALU as in https://arxiv.org/abs/1808.00508.""" with tf.variable_scope(name, default_name="nalu", values=[x], reuse=reuse): x_shape = shape_list(x) x_flat = tf.reshape(x, [-1, x_shape[-1]]) gw = tf.get_variable("w", [x_shape[-1], depth]) g = tf.nn.sigmoid(tf.matmul(x_flat, gw)) g = tf.reshape(g, x_shape[:-1] + [depth]) a = nac(x, depth, name="nac_lin") log_x = tf.log(tf.abs(x) + epsilon) m = nac(log_x, depth, name="nac_log") return g * a + (1 - g) * tf.exp(m)
Example #26
Source File: vq_discrete.py From tensor2tensor with Apache License 2.0 | 5 votes |
def __init__(self, hparams): self.hparams = hparams print ("self.hparams.z_size", self.hparams.z_size) # Set the discretization bottleneck specific things here self.hparams.z_size_per_residual = self.hparams.z_size // \ self.hparams.num_residuals print ("self.hparams.num_residuals", self.hparams.num_residuals) self.hparams.block_dim = int( self.hparams.hidden_size // self.hparams.num_blocks) self.hparams.block_v_size = 2**( self.hparams.z_size_per_residual / self.hparams.num_blocks) self.hparams.block_v_size = int(self.hparams.block_v_size) self.means = tf.get_variable( name="means", shape=[ self.hparams.num_blocks, self.hparams.block_v_size, self.hparams.block_dim ], initializer=tf.initializers.variance_scaling(distribution="uniform")) # Create the shadow variables if we are using EMA if self.hparams.ema: self.ema_count = tf.get_variable( "ema_count", [self.hparams.num_blocks, self.hparams.block_v_size], initializer=tf.constant_initializer(0), trainable=False) with tf.colocate_with(self.means): self.ema_means = tf.get_variable( "ema_means", initializer=self.means.initialized_value(), trainable=False)
Example #27
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 5 votes |
def nac(x, depth, name=None, reuse=None): """NAC as in https://arxiv.org/abs/1808.00508.""" with tf.variable_scope(name, default_name="nac", values=[x], reuse=reuse): x_shape = shape_list(x) w = tf.get_variable("w", [x_shape[-1], depth]) m = tf.get_variable("m", [x_shape[-1], depth]) w = tf.tanh(w) * tf.nn.sigmoid(m) x_flat = tf.reshape(x, [-1, x_shape[-1]]) res_flat = tf.matmul(x_flat, w) return tf.reshape(res_flat, x_shape[:-1] + [depth])
Example #28
Source File: modalities.py From tensor2tensor with Apache License 2.0 | 5 votes |
def get_weights(model_hparams, vocab_size, hidden_dim=None): """Create or get concatenated embedding or softmax variable. Args: model_hparams: HParams, model hyperparmeters. vocab_size: int, vocabulary size. hidden_dim: dim of the variable. Defaults to _model_hparams' hidden_size Returns: a list of num_shards Tensors. """ if hidden_dim is None: hidden_dim = model_hparams.hidden_size num_shards = model_hparams.symbol_modality_num_shards shards = [] for i in range(num_shards): shard_size = (vocab_size // num_shards) + ( 1 if i < vocab_size % num_shards else 0) var_name = "weights_%d" % i shards.append( tf.get_variable( var_name, [shard_size, hidden_dim], initializer=tf.random_normal_initializer(0.0, hidden_dim**-0.5))) if num_shards == 1: ret = shards[0] else: ret = tf.concat(shards, 0) # Convert ret to tensor. if not tf.executing_eagerly(): ret = common_layers.convert_gradient_to_tensor(ret) return ret
Example #29
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 5 votes |
def l2_norm(x, filters=None, epsilon=1e-6, name=None, reuse=None): """Layer normalization with l2 norm.""" if filters is None: filters = shape_list(x)[-1] with tf.variable_scope(name, default_name="l2_norm", values=[x], reuse=reuse): scale = tf.get_variable( "l2_norm_scale", [filters], initializer=tf.ones_initializer()) bias = tf.get_variable( "l2_norm_bias", [filters], initializer=tf.zeros_initializer()) epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]] mean = tf.reduce_mean(x, axis=[-1], keepdims=True) l2norm = tf.reduce_sum( tf.squared_difference(x, mean), axis=[-1], keepdims=True) norm_x = (x - mean) * tf.rsqrt(l2norm + epsilon) return norm_x * scale + bias
Example #30
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 5 votes |
def layer_norm_vars(filters): """Create Variables for layer norm.""" scale = tf.get_variable( "layer_norm_scale", [filters], initializer=tf.ones_initializer()) bias = tf.get_variable( "layer_norm_bias", [filters], initializer=tf.zeros_initializer()) return scale, bias