Python tensorflow.compat.v1.matmul() Examples
The following are 30
code examples of tensorflow.compat.v1.matmul().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.compat.v1
, or try the search function
.
Example #1
Source File: flop_regularizer_test.py From morph-net with Apache License 2.0 | 6 votes |
def testFlopRegularizerDontConvertToVariable(self): tf.reset_default_graph() tf.set_random_seed(1234) x = tf.constant(1.0, shape=[2, 6], name='x', dtype=tf.float32) w = tf.Variable(tf.truncated_normal([6, 4], stddev=1.0), use_resource=True) net = tf.matmul(x, w) # Create FLOPs network regularizer. threshold = 0.9 flop_reg = flop_regularizer.GroupLassoFlopsRegularizer([net.op], threshold, 0) with self.cached_session(): tf.global_variables_initializer().run() flop_reg.get_regularization_term().eval()
Example #2
Source File: run_pretraining.py From albert with Apache License 2.0 | 6 votes |
def get_sentence_order_output(albert_config, input_tensor, labels): """Get loss and log probs for the next sentence prediction.""" # Simple binary classification. Note that 0 is "next sentence" and 1 is # "random sentence". This weight matrix is not used after pre-training. with tf.variable_scope("cls/seq_relationship"): output_weights = tf.get_variable( "output_weights", shape=[2, albert_config.hidden_size], initializer=modeling.create_initializer( albert_config.initializer_range)) output_bias = tf.get_variable( "output_bias", shape=[2], initializer=tf.zeros_initializer()) logits = tf.matmul(input_tensor, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) log_probs = tf.nn.log_softmax(logits, axis=-1) labels = tf.reshape(labels, [-1]) one_hot_labels = tf.one_hot(labels, depth=2, dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) loss = tf.reduce_mean(per_example_loss) return (loss, per_example_loss, log_probs)
Example #3
Source File: utils.py From magenta with Apache License 2.0 | 6 votes |
def linear(x, n_inputs, n_outputs, name): """Simple linear layer. Args: x: The [mb, time, channels] tensor input. n_inputs: The input number of channels. n_outputs: The output number of channels. name: The variable scope to provide to W and biases. Returns: y: The output of the operation. """ w = tf.get_variable( name=name + "/W", shape=[1, 1, n_inputs, n_outputs], dtype=tf.float32) b = tf.get_variable( name=name + "/biases", shape=[n_outputs], dtype=tf.float32) y = tf.nn.bias_add(tf.matmul(x[:, 0, :], w[0][0]), b) y = tf.expand_dims(y, 1) return y
Example #4
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 6 votes |
def smoothing_cross_entropy_factored(a, b, labels, confidence): """Memory-efficient computation of smoothing cross-entropy. Avoids realizing the entire logits matrix at once. Args: a: a Tensor with shape [batch, inner_dim] b: a Tensor with shape [vocab_size, inner_dim] labels: an integer Tensor with shape [batch] confidence: a float Returns: A Tensor with shape [batch] """ num_splits = 16 vocab_size = shape_list(b)[0] labels = approximate_split(labels, num_splits) a = approximate_split(a, num_splits) parts = [] for part in range(num_splits): with tf.control_dependencies(parts[-1:]): logits = tf.matmul(a[part], b, transpose_b=True) parts.append( smoothing_cross_entropy(logits, labels[part], vocab_size, confidence)) return tf.concat(parts, 0)
Example #5
Source File: export_to_tfhub.py From albert with Apache License 2.0 | 6 votes |
def get_mlm_logits(model, albert_config, mlm_positions): """From run_pretraining.py.""" input_tensor = gather_indexes(model.get_sequence_output(), mlm_positions) with tf.variable_scope("cls/predictions"): # We apply one more non-linear transformation before the output layer. # This matrix is not used after pre-training. with tf.variable_scope("transform"): input_tensor = tf.layers.dense( input_tensor, units=albert_config.embedding_size, activation=modeling.get_activation(albert_config.hidden_act), kernel_initializer=modeling.create_initializer( albert_config.initializer_range)) input_tensor = modeling.layer_norm(input_tensor) # The output weights are the same as the input embeddings, but there is # an output-only bias for each token. output_bias = tf.get_variable( "output_bias", shape=[albert_config.vocab_size], initializer=tf.zeros_initializer()) logits = tf.matmul( input_tensor, model.get_embedding_table(), transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) return logits
Example #6
Source File: nade.py From magenta with Apache License 2.0 | 6 votes |
def _cond_prob(self, a, w_dec_i, b_dec_i): """Gets the conditional probability for a single dimension. Args: a: Model's hidden state, sized `[batch_size, num_hidden]`. w_dec_i: The decoder weight terms for the dimension, sized `[num_hidden, 1]`. b_dec_i: The decoder bias terms, sized `[batch_size, 1]`. Returns: cond_p_i: The conditional probability of the dimension, sized `[batch_size, 1]`. cond_l_i: The conditional logits of the dimension, sized `[batch_size, 1]`. """ # Decode hidden units to get conditional probability. h = tf.sigmoid(a) cond_l_i = b_dec_i + tf.matmul(h, w_dec_i) cond_p_i = tf.sigmoid(cond_l_i) return cond_p_i, cond_l_i
Example #7
Source File: transformer_glow_layers_ops.py From tensor2tensor with Apache License 2.0 | 6 votes |
def dense_weightnorm( name, x, n_out, x_mask, init_scale, init, dtype=tf.float32): """Dense layer with weight normalization.""" n_in = common_layers.shape_list(x)[2] eps = tf.keras.backend.epsilon() with tf.variable_scope(name, reuse=tf.AUTO_REUSE): v = tf.get_variable( "v", [n_in, n_out], dtype, initializer=tf.random_normal_initializer(0, 0.05), trainable=True) v = v / tf.norm(v, axis=0, keepdims=True) t = tf.matmul(x, v) # [B, L, n_out] mean, var = moments_over_bl(t, x_mask) g_init = init_scale / (tf.sqrt(var) + eps) g = get_variable_ddi( "g", [n_out], g_init, init, initializer=tf.zeros_initializer, dtype=dtype, trainable=True) b = get_variable_ddi( "b", [n_out], -mean*g_init, init, initializer=tf.zeros_initializer, dtype=dtype, trainable=True) w = g * v y = tf.matmul(x, w) + b tf.summary.histogram("_g", g) return y
Example #8
Source File: relative_bounds.py From interval-bound-propagation with Apache License 2.0 | 6 votes |
def apply_linear(self, wrapper, w, b): """Propagates the bounds through a linear layer. Args: wrapper: Contains prior bounds from a previous iteration. w: 2D tensor of shape (input_size, output_size) containing weights for the linear layer. b: 1D tensor of shape (output_size) containing biases for the linear layer, or `None` if no bias. Returns: Output bounds. """ w_pos = tf.maximum(w, 0) w_neg = tf.minimum(w, 0) lb = (tf.matmul(self.lower_offset, w_pos) + tf.matmul(self.upper_offset, w_neg)) ub = (tf.matmul(self.upper_offset, w_pos) + tf.matmul(self.lower_offset, w_neg)) nominal_out = tf.matmul(self.nominal, w) if b is not None: nominal_out += b return RelativeIntervalBounds(lb, ub, nominal_out)
Example #9
Source File: vq_discrete.py From tensor2tensor with Apache License 2.0 | 6 votes |
def embedding_lookup(self, x, means): """Compute nearest neighbors and loss for training the embeddings. Args: x: Batch of encoder continuous latent states sliced/projected into shape [-1, num_blocks, block_dim]. means: Embedding means. Returns: The nearest neighbor in one hot form, the nearest neighbor itself, the commitment loss, embedding training loss. """ x_means_hot = self.nearest_neighbor(x, means) x_means_hot_flat = tf.reshape( x_means_hot, [-1, self.hparams.num_blocks, self.hparams.block_v_size]) x_means = tf.matmul(tf.transpose(x_means_hot_flat, perm=[1, 0, 2]), means) x_means = tf.transpose(x_means, [1, 0, 2]) q_loss = tf.reduce_mean( tf.squared_difference(tf.stop_gradient(x), x_means)) e_loss = tf.reduce_mean( tf.squared_difference(x, tf.stop_gradient(x_means))) return x_means_hot, x_means, q_loss, e_loss
Example #10
Source File: neural_assistant.py From tensor2tensor with Apache License 2.0 | 6 votes |
def compute_average_embedding(input_embeddings, input_lengths): """Computes bag-of-words embedding. Args: input_embeddings: <tf.float32>[bs, max_seq_len, emb_dim] input_lengths: <tf.int64>[bs, 1] Returns: bow_embedding: <tf.float32>[bs, emb_dim] """ max_seq_len = tf.shape(input_embeddings)[1] # <tf.float32>[bs, 1, max_seq_len] mask = tf.sequence_mask(input_lengths, max_seq_len, dtype=tf.float32) # <tf.float32>[bs, 1, emb_dim] sum_embedding = tf.matmul(mask, input_embeddings) # <tf.float32>[bs, 1, emb_dim] avg_embedding = sum_embedding / tf.to_float(tf.expand_dims(input_lengths, 2)) # <tf.float32>[bs, dim] return tf.squeeze(avg_embedding, 1)
Example #11
Source File: neural_assistant.py From tensor2tensor with Apache License 2.0 | 6 votes |
def compute_last_embedding(input_embeddings, input_lengths, hparams): """Computes average of last K embedding. Args: input_embeddings: <tf.float32>[bs, max_seq_len, emb_dim] input_lengths: <tf.int64>[bs, 1] hparams: model hparams Returns: last_k_embedding: <tf.float32>[bs, emb_dim] """ max_seq_len = tf.shape(input_embeddings)[1] # <tf.float32>[bs, 1, max_seq_len] mask = tf.sequence_mask(input_lengths, max_seq_len, dtype=tf.float32) del_mask = tf.sequence_mask( input_lengths - hparams.last_k, max_seq_len, dtype=tf.float32) final_mask = mask - del_mask # <tf.float32>[bs, 1, emb_dim] sum_embedding = tf.matmul(final_mask, input_embeddings) # <tf.float32>[bs, 1, emb_dim] last_k_embedding = sum_embedding / tf.to_float( tf.expand_dims( tf.ones([tf.shape(input_embeddings)[0], 1]) * hparams.last_k, 2)) # <tf.float32>[bs, dim] return tf.squeeze(last_k_embedding, 1)
Example #12
Source File: vqa_attention.py From tensor2tensor with Apache License 2.0 | 6 votes |
def attn(image_feat, query, hparams, name="attn"): """Attention on image feature with question as query.""" with tf.variable_scope(name, "attn", values=[image_feat, query]): attn_dim = hparams.attn_dim num_glimps = hparams.num_glimps num_channels = common_layers.shape_list(image_feat)[-1] if len(common_layers.shape_list(image_feat)) == 4: image_feat = common_layers.flatten4d3d(image_feat) query = tf.expand_dims(query, 1) image_proj = common_attention.compute_attention_component( image_feat, attn_dim, name="image_proj") query_proj = common_attention.compute_attention_component( query, attn_dim, name="query_proj") h = tf.nn.relu(image_proj + query_proj) h_proj = common_attention.compute_attention_component( h, num_glimps, name="h_proj") p = tf.nn.softmax(h_proj, axis=1) image_ave = tf.matmul(image_feat, p, transpose_a=True) image_ave = tf.reshape(image_ave, [-1, num_channels*num_glimps]) return image_ave
Example #13
Source File: message_passing_attention.py From tensor2tensor with Apache License 2.0 | 6 votes |
def compute_values(edge_compatibility, v): """Compute values. If edge compatibilities is just adjacency, we get ggnn. Args: edge_compatibility: A tensor of shape [batch, num_transforms, length, depth] v: A tensor of shape [batch, num_transforms, length, depth] Returns: output: A [batch, length, depth] tensor """ # Computes the incoming value vectors for each node by weighting them # according to the attention weights. These values are still segregated by # edge type. # Shape = [B, T, N, V]. all_edge_values = tf.matmul(tf.to_float(edge_compatibility), v) # Combines the weighted value vectors together across edge types into a # single N x V matrix for each batch. output = tf.reduce_sum(all_edge_values, axis=1) # Shape [B, N, V]. return output
Example #14
Source File: transformer_nat.py From tensor2tensor with Apache License 2.0 | 6 votes |
def vq_nearest_neighbor(x, hparams): """Find the nearest element in means to elements in x.""" bottleneck_size = 2**hparams.bottleneck_bits means = hparams.means x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True) means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True) scalar_prod = tf.matmul(x, means, transpose_b=True) dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod if hparams.bottleneck_kind == "em": x_means_idx = tf.multinomial(-dist, num_samples=hparams.num_samples) x_means_hot = tf.one_hot( x_means_idx, depth=bottleneck_size) x_means_hot = tf.reduce_mean(x_means_hot, axis=1) else: x_means_idx = tf.argmax(-dist, axis=-1) x_means_hot = tf.one_hot(x_means_idx, depth=bottleneck_size) x_means = tf.matmul(x_means_hot, means) e_loss = tf.reduce_mean(tf.squared_difference(x, tf.stop_gradient(x_means))) return x_means_hot, e_loss
Example #15
Source File: export_checkpoints.py From albert with Apache License 2.0 | 6 votes |
def get_sentence_order_logits(input_tensor, albert_config): """Get loss and log probs for the next sentence prediction.""" # Simple binary classification. Note that 0 is "next sentence" and 1 is # "random sentence". This weight matrix is not used after pre-training. with tf.variable_scope("cls/seq_relationship"): output_weights = tf.get_variable( "output_weights", shape=[2, albert_config.hidden_size], initializer=modeling.create_initializer( albert_config.initializer_range)) output_bias = tf.get_variable( "output_bias", shape=[2], initializer=tf.zeros_initializer()) logits = tf.matmul(input_tensor, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) return logits
Example #16
Source File: export_checkpoints.py From albert with Apache License 2.0 | 6 votes |
def get_mlm_logits(input_tensor, albert_config, mlm_positions, output_weights): """From run_pretraining.py.""" input_tensor = gather_indexes(input_tensor, mlm_positions) with tf.variable_scope("cls/predictions"): # We apply one more non-linear transformation before the output layer. # This matrix is not used after pre-training. with tf.variable_scope("transform"): input_tensor = tf.layers.dense( input_tensor, units=albert_config.embedding_size, activation=modeling.get_activation(albert_config.hidden_act), kernel_initializer=modeling.create_initializer( albert_config.initializer_range)) input_tensor = modeling.layer_norm(input_tensor) # The output weights are the same as the input embeddings, but there is # an output-only bias for each token. output_bias = tf.get_variable( "output_bias", shape=[albert_config.vocab_size], initializer=tf.zeros_initializer()) logits = tf.matmul( input_tensor, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) return logits
Example #17
Source File: matmul_source_op_handler_test.py From morph-net with Apache License 2.0 | 6 votes |
def testMatMul2D(self, size): inputs = tf.zeros((13, 2)) handler = matmul_source_op_handler.MatMulSourceOpHandler(0.1) kernel = tf.constant([[1, 2, 3], [4, 5, 6]], dtype=tf.float32) x = tf.matmul(inputs, kernel, transpose_b=False, name='MatMul') op_slice = orm.OpSlice(x.op, orm.Slice(0, size)) transpose_kernel = tf.constant([[1, 4], [2, 5], [3, 6]], dtype=tf.float32) x_other = tf.matmul( inputs, transpose_kernel, transpose_b=True, name='MatMulTransposedKernel') op_slice_other = orm.OpSlice(x_other.op, orm.Slice(0, size)) self.assertAllClose( handler.create_regularizer(op_slice).regularization_vector, handler.create_regularizer(op_slice_other).regularization_vector)
Example #18
Source File: util.py From nni with MIT License | 6 votes |
def lstm(xs, ms, s, scope, nh, init_scale=1.0): """lstm cell""" _, nin = [v.value for v in xs[0].get_shape()] # the first is nbatch with tf.variable_scope(scope): wx = tf.get_variable("wx", [nin, nh*4], initializer=ortho_init(init_scale)) wh = tf.get_variable("wh", [nh, nh*4], initializer=ortho_init(init_scale)) b = tf.get_variable("b", [nh*4], initializer=tf.constant_initializer(0.0)) c, h = tf.split(axis=1, num_or_size_splits=2, value=s) for idx, (x, m) in enumerate(zip(xs, ms)): c = c*(1-m) h = h*(1-m) z = tf.matmul(x, wx) + tf.matmul(h, wh) + b i, f, o, u = tf.split(axis=1, num_or_size_splits=4, value=z) i = tf.nn.sigmoid(i) f = tf.nn.sigmoid(f) o = tf.nn.sigmoid(o) u = tf.tanh(u) c = f*c + i*u h = o*tf.tanh(c) xs[idx] = h s = tf.concat(axis=1, values=[c, h]) return xs, s
Example #19
Source File: tiled_linear.py From lamb with Apache License 2.0 | 6 votes |
def _build_tiled_linear(self, inputs, input_name_and_sizes, output_name_and_sizes, add_bias): # pylint: disable=missing-docstring def split_output(output): if len(output_name_and_sizes) == 1: return output elif len(set([size for _, size in output_name_and_sizes])) == 1: # This is a bit faster than several tf.slice calls. return tf.split(output, len(output_name_and_sizes), axis=1) else: outputs = [] offset = 0 for _, output_size in output_name_and_sizes: outputs.append(tf.slice(output, [0, offset], [-1, output_size])) offset += output_size return outputs weights = self._ensure_weights() if len(inputs) > 1: inputs = tf.concat(inputs, 1) if add_bias: biases = self._ensure_biases() return split_output(tf.nn.xw_plus_b(inputs, weights, biases)) else: return split_output(tf.matmul(inputs, weights))
Example #20
Source File: transformer_memory.py From tensor2tensor with Apache License 2.0 | 6 votes |
def _address_content(self, x): """Address the memory based on content similarity. Args: x: a tensor in the shape of [batch_size, length, depth]. Returns: the logits for each memory entry [batch_size, length, memory_size]. """ mem_keys = tf.layers.dense(self.mem_vals, self.key_depth, bias_initializer=tf.constant_initializer(1.0), name="mem_key") mem_query = tf.layers.dense(x, self.key_depth, bias_initializer=tf.constant_initializer(1.0), name="mem_query") norm = tf.matmul(self._norm(mem_query), self._norm(mem_keys), transpose_b=True) dot_product = tf.matmul(mem_query, mem_keys, transpose_b=True) cos_dist = tf.div(dot_product, norm + 1e-7, name="cos_dist") access_logits = self.sharpen_factor * cos_dist return access_logits
Example #21
Source File: fastlin.py From interval-bound-propagation with Apache License 2.0 | 5 votes |
def apply_linear(self, wrapper, w, b): bounds_out = super(RelativeSymbolicBounds, self).apply_linear( wrapper, w, b=None) nominal_out = tf.matmul(self._nominal, w) if b is not None: nominal_out += b return RelativeSymbolicBounds( bounds_out.lower, bounds_out.upper, nominal_out).with_priors( wrapper.output_bounds)
Example #22
Source File: modeling.py From albert with Apache License 2.0 | 5 votes |
def dense_layer_2d(input_tensor, output_size, initializer, activation, use_einsum, num_attention_heads=1, name=None): """A dense layer with 2D kernel. Args: input_tensor: Float tensor with rank 3. output_size: The size of output dimension. initializer: Kernel initializer. activation: Activation function. use_einsum: bool. Whether to use einsum or reshape+matmul for dense layers. num_attention_heads: number of attention head in attention layer. name: The name scope of this layer. Returns: float logits Tensor. """ del num_attention_heads # unused input_shape = get_shape_list(input_tensor) hidden_size = input_shape[2] with tf.variable_scope(name): w = tf.get_variable( name="kernel", shape=[hidden_size, output_size], initializer=initializer) b = tf.get_variable( name="bias", shape=[output_size], initializer=tf.zeros_initializer) if use_einsum: ret = tf.einsum("BFH,HO->BFO", input_tensor, w) else: ret = tf.matmul(input_tensor, w) ret += b if activation is not None: return activation(ret) else: return ret
Example #23
Source File: model.py From gpt2-estimator with MIT License | 5 votes |
def conv1d(x, scope, nf, *, w_init_stdev=0.02): with tf.variable_scope(scope): *start, nx = shape_list(x) w = tf.get_variable( 'w', [1, nx, nf], initializer=tf.random_normal_initializer(stddev=w_init_stdev)) b = tf.get_variable('b', [nf], initializer=tf.constant_initializer(0)) c = tf.reshape(tf.matmul(tf.reshape( x, [-1, nx]), tf.reshape(w, [-1, nf]))+b, start+[nf]) return c
Example #24
Source File: snail_test.py From tensor2robot with Apache License 2.0 | 5 votes |
def test_CausallyMaskedSoftmax(self): num_rows = 5 x = tf.random.normal((num_rows, 3)) logits = tf.matmul(x, tf.linalg.transpose(x)) y = snail.CausallyMaskedSoftmax(logits) with self.test_session() as sess: y_ = sess.run(y) idx = np.triu_indices(num_rows, 1) np.testing.assert_array_equal(y_[idx], 0.) # Testing that each row sums to 1. for i in range(num_rows): np.testing.assert_almost_equal(np.sum(y_[i, :]), 1.0)
Example #25
Source File: snail.py From tensor2robot with Apache License 2.0 | 5 votes |
def AttentionBlock(x, key_size, value_size, scope = ""): """Self-attention key-value lookup, styled after Vaswani et al. '17. query and key are of shape [T, K]. query * transpose(key) yields logits of shape [T, T]. logits[i, j] corresponds to unnormalized attention vector over values [T, V] for each timestep i. Because this attention is over a set of temporal values, we causally mask the pre-softmax logits[i, j] := 0, for all j > i. Citations: Vaswani et al. '17: Attention is All you need https://arxiv.org/abs/1706.03762. Args: x: Input tensor of shape [batch, sequence_length, channels]. key_size: Integer key dimensionality. value_size: Integer value dimensionality. scope: Variable scope for this layer. Returns: result: Tensor of shape [batch, sequence_length, channels + value_size] end_points: Dictionary of intermediate values (e.g. debugging). """ end_points = {} with tf.variable_scope(scope): key = layers.fully_connected(x, key_size, activation_fn=None) # [T, K] query = layers.fully_connected(x, key_size, activation_fn=None) # [T, K] logits = tf.matmul(query, key, transpose_b=True) # [T, T] # Useful for visualizing attention alignment matrices. probs = CausallyMaskedSoftmax(logits/np.sqrt(key_size)) # [T, T] end_points["attn_prob"] = probs values = layers.fully_connected(x, value_size, activation_fn=None) # [T, V] read = tf.matmul(probs, values) # [T, V] result = tf.concat([x, read], axis=2) # [T, K + V] return result, end_points
Example #26
Source File: rnn.py From magenta with Apache License 2.0 | 5 votes |
def super_linear(x, output_size, scope=None, reuse=False, init_w='ortho', weight_start=0.0, use_bias=True, bias_start=0.0, input_size=None): """Performs linear operation. Uses ortho init defined earlier.""" shape = x.get_shape().as_list() with tf.variable_scope(scope or 'linear'): if reuse: tf.get_variable_scope().reuse_variables() w_init = None # uniform if input_size is None: x_size = shape[1] else: x_size = input_size if init_w == 'zeros': w_init = tf.constant_initializer(0.0) elif init_w == 'constant': w_init = tf.constant_initializer(weight_start) elif init_w == 'gaussian': w_init = tf.random_normal_initializer(stddev=weight_start) elif init_w == 'ortho': w_init = lstm_ortho_initializer(1.0) w = tf.get_variable( 'super_linear_w', [x_size, output_size], tf.float32, initializer=w_init) if use_bias: b = tf.get_variable( 'super_linear_b', [output_size], tf.float32, initializer=tf.constant_initializer(bias_start)) return tf.matmul(x, w) + b return tf.matmul(x, w)
Example #27
Source File: rnn.py From magenta with Apache License 2.0 | 5 votes |
def __call__(self, x, state, scope=None): with tf.variable_scope(scope or type(self).__name__): c, h = tf.split(state, 2, 1) x_size = x.get_shape().as_list()[1] w_init = None # uniform h_init = lstm_ortho_initializer(1.0) # Keep W_xh and W_hh separate here as well to use different init methods. w_xh = tf.get_variable( 'W_xh', [x_size, 4 * self.num_units], initializer=w_init) w_hh = tf.get_variable( 'W_hh', [self.num_units, 4 * self.num_units], initializer=h_init) bias = tf.get_variable( 'bias', [4 * self.num_units], initializer=tf.constant_initializer(0.0)) concat = tf.concat([x, h], 1) w_full = tf.concat([w_xh, w_hh], 0) hidden = tf.matmul(concat, w_full) + bias i, j, f, o = tf.split(hidden, 4, 1) if self.use_recurrent_dropout: g = tf.nn.dropout(tf.tanh(j), self.dropout_keep_prob) else: g = tf.tanh(j) new_c = c * tf.sigmoid(f + self.forget_bias) + tf.sigmoid(i) * g new_h = tf.tanh(new_c) * tf.sigmoid(o) return new_h, tf.concat([new_c, new_h], 1) # fuk tuples.
Example #28
Source File: learning.py From magenta with Apache License 2.0 | 5 votes |
def gram_matrix(feature_maps): """Computes the Gram matrix for a set of feature maps.""" batch_size, height, width, channels = tf.unstack(tf.shape(feature_maps)) denominator = tf.to_float(height * width) feature_maps = tf.reshape( feature_maps, tf.stack([batch_size, height * width, channels])) matrix = tf.matmul(feature_maps, feature_maps, adjoint_a=True) return matrix / denominator
Example #29
Source File: models.py From graphics with Apache License 2.0 | 5 votes |
def _compute_sdf(self, x, translations, blend_terms, points): """Compute signed distances between query points and hyperplanes.""" n_parts = tf.shape(x)[1] n_planes = tf.shape(x)[2] norm_logit = x[..., 0:self._dims - 1] offset = (-(tf.nn.sigmoid(x[..., self._dims - 1:self._dims]) * self._offset_scale + self._offset_lbound)) blend_planes = ( tf.nn.sigmoid(blend_terms[..., :n_parts]) * self._blend_scale + self._blend_lbound) # Norm of the boundary line norm_rad = tf.tanh(norm_logit) * np.pi # [..., (azimuth, altitude)] if self._dims == 3: norm = tf.stack([ tf.sin(norm_rad[..., 1]) * tf.cos(norm_rad[..., 0]), tf.sin(norm_rad[..., 1]) * tf.sin(norm_rad[..., 0]), tf.cos(norm_rad[..., 1]) ], axis=-1) else: norm = tf.concat([tf.cos(norm_rad), tf.sin(norm_rad)], axis=-1) # Calculate signed distances to hyperplanes. points = ( tf.expand_dims(points, axis=1) - tf.expand_dims(translations, axis=2)) points = tf.expand_dims(points, axis=2) points = tf.tile(points, [1, 1, n_planes, 1, 1]) signed_dis = tf.matmul(points, tf.expand_dims(norm, axis=-1)) signed_dis = signed_dis + tf.expand_dims(offset, axis=-2) return signed_dis, translations, blend_planes, offset
Example #30
Source File: modeling.py From albert with Apache License 2.0 | 5 votes |
def dense_layer_3d_proj(input_tensor, hidden_size, head_size, initializer, activation, use_einsum, name=None): """A dense layer with 3D kernel for projection. Args: input_tensor: float Tensor of shape [batch,from_seq_length, num_attention_heads, size_per_head]. hidden_size: The size of hidden layer. head_size: The size of head. initializer: Kernel initializer. activation: Actication function. use_einsum: bool. Whether to use einsum or reshape+matmul for dense layers. name: The name scope of this layer. Returns: float logits Tensor. """ input_shape = get_shape_list(input_tensor) num_attention_heads = input_shape[2] with tf.variable_scope(name): w = tf.get_variable( name="kernel", shape=[num_attention_heads * head_size, hidden_size], initializer=initializer) w = tf.reshape(w, [num_attention_heads, head_size, hidden_size]) b = tf.get_variable( name="bias", shape=[hidden_size], initializer=tf.zeros_initializer) if use_einsum: ret = tf.einsum("BFND,NDH->BFH", input_tensor, w) else: ret = einsum_via_matmul(input_tensor, w, 2) ret += b if activation is not None: return activation(ret) else: return ret