Python Examples of tensorflow.compat.v1.matmul

Source File: flop_regularizer_test.py From morph-net with Apache License 2.0

6 votes

def testFlopRegularizerDontConvertToVariable(self):
    tf.reset_default_graph()
    tf.set_random_seed(1234)

    x = tf.constant(1.0, shape=[2, 6], name='x', dtype=tf.float32)
    w = tf.Variable(tf.truncated_normal([6, 4], stddev=1.0), use_resource=True)
    net = tf.matmul(x, w)

    # Create FLOPs network regularizer.
    threshold = 0.9
    flop_reg = flop_regularizer.GroupLassoFlopsRegularizer([net.op], threshold,
                                                           0)

    with self.cached_session():
      tf.global_variables_initializer().run()
      flop_reg.get_regularization_term().eval()

Source File: run_pretraining.py From albert with Apache License 2.0

6 votes

def get_sentence_order_output(albert_config, input_tensor, labels):
  """Get loss and log probs for the next sentence prediction."""

  # Simple binary classification. Note that 0 is "next sentence" and 1 is
  # "random sentence". This weight matrix is not used after pre-training.
  with tf.variable_scope("cls/seq_relationship"):
    output_weights = tf.get_variable(
        "output_weights",
        shape=[2, albert_config.hidden_size],
        initializer=modeling.create_initializer(
            albert_config.initializer_range))
    output_bias = tf.get_variable(
        "output_bias", shape=[2], initializer=tf.zeros_initializer())

    logits = tf.matmul(input_tensor, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    log_probs = tf.nn.log_softmax(logits, axis=-1)
    labels = tf.reshape(labels, [-1])
    one_hot_labels = tf.one_hot(labels, depth=2, dtype=tf.float32)
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, per_example_loss, log_probs)

Source File: utils.py From magenta with Apache License 2.0

6 votes

def linear(x, n_inputs, n_outputs, name):
  """Simple linear layer.

  Args:
    x: The [mb, time, channels] tensor input.
    n_inputs: The input number of channels.
    n_outputs: The output number of channels.
    name: The variable scope to provide to W and biases.

  Returns:
    y: The output of the operation.
  """
  w = tf.get_variable(
      name=name + "/W", shape=[1, 1, n_inputs, n_outputs], dtype=tf.float32)
  b = tf.get_variable(
      name=name + "/biases", shape=[n_outputs], dtype=tf.float32)
  y = tf.nn.bias_add(tf.matmul(x[:, 0, :], w[0][0]), b)
  y = tf.expand_dims(y, 1)
  return y

Source File: common_layers.py From tensor2tensor with Apache License 2.0

6 votes

def smoothing_cross_entropy_factored(a, b, labels, confidence):
  """Memory-efficient computation of smoothing cross-entropy.

  Avoids realizing the entire logits matrix at once.

  Args:
    a: a Tensor with shape [batch, inner_dim]
    b: a Tensor with shape [vocab_size, inner_dim]
    labels: an integer Tensor with shape [batch]
    confidence: a float

  Returns:
    A Tensor with shape [batch]
  """
  num_splits = 16
  vocab_size = shape_list(b)[0]
  labels = approximate_split(labels, num_splits)
  a = approximate_split(a, num_splits)
  parts = []
  for part in range(num_splits):
    with tf.control_dependencies(parts[-1:]):
      logits = tf.matmul(a[part], b, transpose_b=True)
      parts.append(
          smoothing_cross_entropy(logits, labels[part], vocab_size, confidence))
  return tf.concat(parts, 0)

Source File: export_to_tfhub.py From albert with Apache License 2.0

6 votes

def get_mlm_logits(model, albert_config, mlm_positions):
  """From run_pretraining.py."""
  input_tensor = gather_indexes(model.get_sequence_output(), mlm_positions)
  with tf.variable_scope("cls/predictions"):
    # We apply one more non-linear transformation before the output layer.
    # This matrix is not used after pre-training.
    with tf.variable_scope("transform"):
      input_tensor = tf.layers.dense(
          input_tensor,
          units=albert_config.embedding_size,
          activation=modeling.get_activation(albert_config.hidden_act),
          kernel_initializer=modeling.create_initializer(
              albert_config.initializer_range))
      input_tensor = modeling.layer_norm(input_tensor)

    # The output weights are the same as the input embeddings, but there is
    # an output-only bias for each token.
    output_bias = tf.get_variable(
        "output_bias",
        shape=[albert_config.vocab_size],
        initializer=tf.zeros_initializer())
    logits = tf.matmul(
        input_tensor, model.get_embedding_table(), transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
  return logits

Source File: nade.py From magenta with Apache License 2.0

6 votes

def _cond_prob(self, a, w_dec_i, b_dec_i):
    """Gets the conditional probability for a single dimension.

    Args:
      a: Model's hidden state, sized `[batch_size, num_hidden]`.
      w_dec_i: The decoder weight terms for the dimension, sized
          `[num_hidden, 1]`.
      b_dec_i: The decoder bias terms, sized `[batch_size, 1]`.

    Returns:
      cond_p_i: The conditional probability of the dimension, sized
        `[batch_size, 1]`.
      cond_l_i: The conditional logits of the dimension, sized
        `[batch_size, 1]`.
    """
    # Decode hidden units to get conditional probability.
    h = tf.sigmoid(a)
    cond_l_i = b_dec_i + tf.matmul(h, w_dec_i)
    cond_p_i = tf.sigmoid(cond_l_i)
    return cond_p_i, cond_l_i

Source File: transformer_glow_layers_ops.py From tensor2tensor with Apache License 2.0

6 votes

def dense_weightnorm(
    name, x, n_out, x_mask, init_scale, init, dtype=tf.float32):
  """Dense layer with weight normalization."""
  n_in = common_layers.shape_list(x)[2]
  eps = tf.keras.backend.epsilon()
  with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
    v = tf.get_variable(
        "v", [n_in, n_out], dtype,
        initializer=tf.random_normal_initializer(0, 0.05), trainable=True)
    v = v / tf.norm(v, axis=0, keepdims=True)
    t = tf.matmul(x, v)  # [B, L, n_out]
    mean, var = moments_over_bl(t, x_mask)
    g_init = init_scale / (tf.sqrt(var) + eps)
    g = get_variable_ddi(
        "g", [n_out], g_init, init,
        initializer=tf.zeros_initializer, dtype=dtype, trainable=True)
    b = get_variable_ddi(
        "b", [n_out], -mean*g_init, init,
        initializer=tf.zeros_initializer, dtype=dtype, trainable=True)
    w = g * v
    y = tf.matmul(x, w) + b
    tf.summary.histogram("_g", g)
    return y

Source File: relative_bounds.py From interval-bound-propagation with Apache License 2.0

6 votes

def apply_linear(self, wrapper, w, b):
    """Propagates the bounds through a linear layer.

    Args:
      wrapper: Contains prior bounds from a previous iteration.
      w: 2D tensor of shape (input_size, output_size) containing
        weights for the linear layer.
      b: 1D tensor of shape (output_size) containing biases for the linear
        layer, or `None` if no bias.

    Returns:
      Output bounds.
    """
    w_pos = tf.maximum(w, 0)
    w_neg = tf.minimum(w, 0)
    lb = (tf.matmul(self.lower_offset, w_pos) +
          tf.matmul(self.upper_offset, w_neg))
    ub = (tf.matmul(self.upper_offset, w_pos) +
          tf.matmul(self.lower_offset, w_neg))

    nominal_out = tf.matmul(self.nominal, w)
    if b is not None:
      nominal_out += b

    return RelativeIntervalBounds(lb, ub, nominal_out)

Source File: vq_discrete.py From tensor2tensor with Apache License 2.0

6 votes

def embedding_lookup(self, x, means):
    """Compute nearest neighbors and loss for training the embeddings.

    Args:
        x: Batch of encoder continuous latent states sliced/projected into
        shape
        [-1, num_blocks, block_dim].
        means: Embedding means.

    Returns:
        The nearest neighbor in one hot form, the nearest neighbor
        itself, the
        commitment loss, embedding training loss.
    """
    x_means_hot = self.nearest_neighbor(x, means)
    x_means_hot_flat = tf.reshape(
        x_means_hot, [-1, self.hparams.num_blocks, self.hparams.block_v_size])
    x_means = tf.matmul(tf.transpose(x_means_hot_flat, perm=[1, 0, 2]), means)
    x_means = tf.transpose(x_means, [1, 0, 2])
    q_loss = tf.reduce_mean(
        tf.squared_difference(tf.stop_gradient(x), x_means))
    e_loss = tf.reduce_mean(
        tf.squared_difference(x, tf.stop_gradient(x_means)))
    return x_means_hot, x_means, q_loss, e_loss

Source File: neural_assistant.py From tensor2tensor with Apache License 2.0

6 votes

def compute_average_embedding(input_embeddings, input_lengths):
  """Computes bag-of-words embedding.

  Args:
    input_embeddings: <tf.float32>[bs, max_seq_len, emb_dim]
    input_lengths: <tf.int64>[bs, 1]

  Returns:
    bow_embedding: <tf.float32>[bs, emb_dim]
  """
  max_seq_len = tf.shape(input_embeddings)[1]
  # <tf.float32>[bs, 1, max_seq_len]
  mask = tf.sequence_mask(input_lengths, max_seq_len, dtype=tf.float32)
  # <tf.float32>[bs, 1, emb_dim]
  sum_embedding = tf.matmul(mask, input_embeddings)
  # <tf.float32>[bs, 1, emb_dim]
  avg_embedding = sum_embedding / tf.to_float(tf.expand_dims(input_lengths, 2))
  # <tf.float32>[bs, dim]
  return tf.squeeze(avg_embedding, 1)

Source File: neural_assistant.py From tensor2tensor with Apache License 2.0

6 votes

def compute_last_embedding(input_embeddings, input_lengths, hparams):
  """Computes average of last K embedding.

  Args:
    input_embeddings: <tf.float32>[bs, max_seq_len, emb_dim]
    input_lengths: <tf.int64>[bs, 1]
    hparams: model hparams

  Returns:
    last_k_embedding: <tf.float32>[bs, emb_dim]
  """
  max_seq_len = tf.shape(input_embeddings)[1]
  # <tf.float32>[bs, 1, max_seq_len]
  mask = tf.sequence_mask(input_lengths, max_seq_len, dtype=tf.float32)
  del_mask = tf.sequence_mask(
      input_lengths - hparams.last_k, max_seq_len, dtype=tf.float32)
  final_mask = mask - del_mask
  # <tf.float32>[bs, 1, emb_dim]
  sum_embedding = tf.matmul(final_mask, input_embeddings)
  # <tf.float32>[bs, 1, emb_dim]
  last_k_embedding = sum_embedding / tf.to_float(
      tf.expand_dims(
          tf.ones([tf.shape(input_embeddings)[0], 1]) * hparams.last_k, 2))
  # <tf.float32>[bs, dim]
  return tf.squeeze(last_k_embedding, 1)

Source File: vqa_attention.py From tensor2tensor with Apache License 2.0

6 votes

def attn(image_feat, query, hparams, name="attn"):
  """Attention on image feature with question as query."""
  with tf.variable_scope(name, "attn", values=[image_feat, query]):
    attn_dim = hparams.attn_dim
    num_glimps = hparams.num_glimps
    num_channels = common_layers.shape_list(image_feat)[-1]
    if len(common_layers.shape_list(image_feat)) == 4:
      image_feat = common_layers.flatten4d3d(image_feat)
    query = tf.expand_dims(query, 1)
    image_proj = common_attention.compute_attention_component(
        image_feat, attn_dim, name="image_proj")
    query_proj = common_attention.compute_attention_component(
        query, attn_dim, name="query_proj")
    h = tf.nn.relu(image_proj + query_proj)
    h_proj = common_attention.compute_attention_component(
        h, num_glimps, name="h_proj")
    p = tf.nn.softmax(h_proj, axis=1)
    image_ave = tf.matmul(image_feat, p, transpose_a=True)
    image_ave = tf.reshape(image_ave, [-1, num_channels*num_glimps])

    return image_ave

Source File: message_passing_attention.py From tensor2tensor with Apache License 2.0

6 votes

def compute_values(edge_compatibility, v):
  """Compute values. If edge compatibilities is just adjacency, we get ggnn.

  Args:
    edge_compatibility: A tensor of shape [batch, num_transforms, length, depth]
    v: A tensor of shape [batch, num_transforms, length, depth]

  Returns:
    output: A [batch, length, depth] tensor
  """

  # Computes the incoming value vectors for each node by weighting them
  # according to the attention weights. These values are still segregated by
  # edge type.
  # Shape = [B, T, N, V].
  all_edge_values = tf.matmul(tf.to_float(edge_compatibility), v)

  # Combines the weighted value vectors together across edge types into a
  # single N x V matrix for each batch.
  output = tf.reduce_sum(all_edge_values, axis=1)  # Shape [B, N, V].
  return output

Source File: transformer_nat.py From tensor2tensor with Apache License 2.0

6 votes

def vq_nearest_neighbor(x, hparams):
  """Find the nearest element in means to elements in x."""
  bottleneck_size = 2**hparams.bottleneck_bits
  means = hparams.means
  x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True)
  means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True)
  scalar_prod = tf.matmul(x, means, transpose_b=True)
  dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod
  if hparams.bottleneck_kind == "em":
    x_means_idx = tf.multinomial(-dist, num_samples=hparams.num_samples)
    x_means_hot = tf.one_hot(
        x_means_idx, depth=bottleneck_size)
    x_means_hot = tf.reduce_mean(x_means_hot, axis=1)
  else:
    x_means_idx = tf.argmax(-dist, axis=-1)
    x_means_hot = tf.one_hot(x_means_idx, depth=bottleneck_size)
  x_means = tf.matmul(x_means_hot, means)
  e_loss = tf.reduce_mean(tf.squared_difference(x, tf.stop_gradient(x_means)))
  return x_means_hot, e_loss

Source File: export_checkpoints.py From albert with Apache License 2.0

6 votes

def get_sentence_order_logits(input_tensor, albert_config):
  """Get loss and log probs for the next sentence prediction."""

  # Simple binary classification. Note that 0 is "next sentence" and 1 is
  # "random sentence". This weight matrix is not used after pre-training.
  with tf.variable_scope("cls/seq_relationship"):
    output_weights = tf.get_variable(
        "output_weights",
        shape=[2, albert_config.hidden_size],
        initializer=modeling.create_initializer(
            albert_config.initializer_range))
    output_bias = tf.get_variable(
        "output_bias", shape=[2], initializer=tf.zeros_initializer())

    logits = tf.matmul(input_tensor, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    return logits

Source File: export_checkpoints.py From albert with Apache License 2.0

6 votes

def get_mlm_logits(input_tensor, albert_config, mlm_positions, output_weights):
  """From run_pretraining.py."""
  input_tensor = gather_indexes(input_tensor, mlm_positions)
  with tf.variable_scope("cls/predictions"):
    # We apply one more non-linear transformation before the output layer.
    # This matrix is not used after pre-training.
    with tf.variable_scope("transform"):
      input_tensor = tf.layers.dense(
          input_tensor,
          units=albert_config.embedding_size,
          activation=modeling.get_activation(albert_config.hidden_act),
          kernel_initializer=modeling.create_initializer(
              albert_config.initializer_range))
      input_tensor = modeling.layer_norm(input_tensor)

    # The output weights are the same as the input embeddings, but there is
    # an output-only bias for each token.
    output_bias = tf.get_variable(
        "output_bias",
        shape=[albert_config.vocab_size],
        initializer=tf.zeros_initializer())
    logits = tf.matmul(
        input_tensor, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
  return logits

Source File: matmul_source_op_handler_test.py From morph-net with Apache License 2.0

6 votes

def testMatMul2D(self, size):
    inputs = tf.zeros((13, 2))
    handler = matmul_source_op_handler.MatMulSourceOpHandler(0.1)

    kernel = tf.constant([[1, 2, 3], [4, 5, 6]], dtype=tf.float32)
    x = tf.matmul(inputs, kernel, transpose_b=False, name='MatMul')
    op_slice = orm.OpSlice(x.op, orm.Slice(0, size))

    transpose_kernel = tf.constant([[1, 4], [2, 5], [3, 6]], dtype=tf.float32)
    x_other = tf.matmul(
        inputs,
        transpose_kernel,
        transpose_b=True,
        name='MatMulTransposedKernel')
    op_slice_other = orm.OpSlice(x_other.op, orm.Slice(0, size))

    self.assertAllClose(
        handler.create_regularizer(op_slice).regularization_vector,
        handler.create_regularizer(op_slice_other).regularization_vector)

Source File: util.py From nni with MIT License

6 votes

def lstm(xs, ms, s, scope, nh, init_scale=1.0):
    """lstm cell"""
    _, nin = [v.value for v in xs[0].get_shape()] # the first is nbatch
    with tf.variable_scope(scope):
        wx = tf.get_variable("wx", [nin, nh*4], initializer=ortho_init(init_scale))
        wh = tf.get_variable("wh", [nh, nh*4], initializer=ortho_init(init_scale))
        b = tf.get_variable("b", [nh*4], initializer=tf.constant_initializer(0.0))

    c, h = tf.split(axis=1, num_or_size_splits=2, value=s)
    for idx, (x, m) in enumerate(zip(xs, ms)):
        c = c*(1-m)
        h = h*(1-m)
        z = tf.matmul(x, wx) + tf.matmul(h, wh) + b
        i, f, o, u = tf.split(axis=1, num_or_size_splits=4, value=z)
        i = tf.nn.sigmoid(i)
        f = tf.nn.sigmoid(f)
        o = tf.nn.sigmoid(o)
        u = tf.tanh(u)
        c = f*c + i*u
        h = o*tf.tanh(c)
        xs[idx] = h
    s = tf.concat(axis=1, values=[c, h])
    return xs, s

Source File: tiled_linear.py From lamb with Apache License 2.0

6 votes

def _build_tiled_linear(self, inputs, input_name_and_sizes,
                          output_name_and_sizes, add_bias):
    # pylint: disable=missing-docstring
    def split_output(output):
      if len(output_name_and_sizes) == 1:
        return output
      elif len(set([size for _, size in output_name_and_sizes])) == 1:
        # This is a bit faster than several tf.slice calls.
        return tf.split(output, len(output_name_and_sizes), axis=1)
      else:
        outputs = []
        offset = 0
        for _, output_size in output_name_and_sizes:
          outputs.append(tf.slice(output, [0, offset], [-1, output_size]))
          offset += output_size
        return outputs

    weights = self._ensure_weights()
    if len(inputs) > 1:
      inputs = tf.concat(inputs, 1)
    if add_bias:
      biases = self._ensure_biases()
      return split_output(tf.nn.xw_plus_b(inputs, weights, biases))
    else:
      return split_output(tf.matmul(inputs, weights))

Source File: transformer_memory.py From tensor2tensor with Apache License 2.0

6 votes

def _address_content(self, x):
    """Address the memory based on content similarity.

    Args:
      x: a tensor in the shape of [batch_size, length, depth].
    Returns:
      the logits for each memory entry [batch_size, length, memory_size].
    """
    mem_keys = tf.layers.dense(self.mem_vals, self.key_depth,
                               bias_initializer=tf.constant_initializer(1.0),
                               name="mem_key")
    mem_query = tf.layers.dense(x, self.key_depth,
                                bias_initializer=tf.constant_initializer(1.0),
                                name="mem_query")
    norm = tf.matmul(self._norm(mem_query), self._norm(mem_keys),
                     transpose_b=True)
    dot_product = tf.matmul(mem_query, mem_keys, transpose_b=True)
    cos_dist = tf.div(dot_product, norm + 1e-7, name="cos_dist")
    access_logits = self.sharpen_factor * cos_dist
    return access_logits

Source File: fastlin.py From interval-bound-propagation with Apache License 2.0

5 votes

def apply_linear(self, wrapper, w, b):
    bounds_out = super(RelativeSymbolicBounds, self).apply_linear(
        wrapper, w, b=None)

    nominal_out = tf.matmul(self._nominal, w)
    if b is not None:
      nominal_out += b

    return RelativeSymbolicBounds(
        bounds_out.lower, bounds_out.upper, nominal_out).with_priors(
            wrapper.output_bounds)

Source File: modeling.py From albert with Apache License 2.0

5 votes

def dense_layer_2d(input_tensor,
                   output_size,
                   initializer,
                   activation,
                   use_einsum,
                   num_attention_heads=1,
                   name=None):
  """A dense layer with 2D kernel.

  Args:
    input_tensor: Float tensor with rank 3.
    output_size: The size of output dimension.
    initializer: Kernel initializer.
    activation: Activation function.
    use_einsum: bool. Whether to use einsum or reshape+matmul for dense layers.
    num_attention_heads: number of attention head in attention layer.
    name: The name scope of this layer.

  Returns:
    float logits Tensor.
  """
  del num_attention_heads  # unused
  input_shape = get_shape_list(input_tensor)
  hidden_size = input_shape[2]
  with tf.variable_scope(name):
    w = tf.get_variable(
        name="kernel",
        shape=[hidden_size, output_size],
        initializer=initializer)
    b = tf.get_variable(
        name="bias", shape=[output_size], initializer=tf.zeros_initializer)
    if use_einsum:
      ret = tf.einsum("BFH,HO->BFO", input_tensor, w)
    else:
      ret = tf.matmul(input_tensor, w)
    ret += b
  if activation is not None:
    return activation(ret)
  else:
    return ret

Source File: model.py From gpt2-estimator with MIT License

5 votes

def conv1d(x, scope, nf, *, w_init_stdev=0.02):
    with tf.variable_scope(scope):
        *start, nx = shape_list(x)
        w = tf.get_variable(
            'w', [1, nx, nf], initializer=tf.random_normal_initializer(stddev=w_init_stdev))
        b = tf.get_variable('b', [nf], initializer=tf.constant_initializer(0))
        c = tf.reshape(tf.matmul(tf.reshape(
            x, [-1, nx]), tf.reshape(w, [-1, nf]))+b, start+[nf])
        return c

Source File: snail_test.py From tensor2robot with Apache License 2.0

5 votes

def test_CausallyMaskedSoftmax(self):
    num_rows = 5
    x = tf.random.normal((num_rows, 3))
    logits = tf.matmul(x, tf.linalg.transpose(x))
    y = snail.CausallyMaskedSoftmax(logits)
    with self.test_session() as sess:
      y_ = sess.run(y)
      idx = np.triu_indices(num_rows, 1)
      np.testing.assert_array_equal(y_[idx], 0.)
      # Testing that each row sums to 1.
      for i in range(num_rows):
        np.testing.assert_almost_equal(np.sum(y_[i, :]), 1.0)

Source File: snail.py From tensor2robot with Apache License 2.0

5 votes

def AttentionBlock(x, key_size, value_size, scope = ""):
  """Self-attention key-value lookup, styled after Vaswani et al. '17.

  query and key are of shape [T, K]. query * transpose(key) yields logits of
  shape [T, T]. logits[i, j] corresponds to unnormalized attention vector over
  values [T, V] for each timestep i. Because this attention is over a set of
  temporal values, we causally mask the pre-softmax logits[i, j] := 0, for all
  j > i.

  Citations:
    Vaswani et al. '17: Attention is All you need
      https://arxiv.org/abs/1706.03762.

  Args:
    x: Input tensor of shape [batch, sequence_length, channels].
    key_size: Integer key dimensionality.
    value_size: Integer value dimensionality.
    scope: Variable scope for this layer.
  Returns:
    result: Tensor of shape [batch, sequence_length, channels + value_size]
    end_points: Dictionary of intermediate values (e.g. debugging).
  """
  end_points = {}
  with tf.variable_scope(scope):
    key = layers.fully_connected(x, key_size, activation_fn=None)  # [T, K]
    query = layers.fully_connected(x, key_size, activation_fn=None)  # [T, K]
    logits = tf.matmul(query, key, transpose_b=True)  # [T, T]
    # Useful for visualizing attention alignment matrices.
    probs = CausallyMaskedSoftmax(logits/np.sqrt(key_size))  # [T, T]
    end_points["attn_prob"] = probs
    values = layers.fully_connected(x, value_size, activation_fn=None)  # [T, V]
    read = tf.matmul(probs, values)  # [T, V]
    result = tf.concat([x, read], axis=2)  # [T, K + V]
    return result, end_points

Source File: rnn.py From magenta with Apache License 2.0

5 votes

def super_linear(x,
                 output_size,
                 scope=None,
                 reuse=False,
                 init_w='ortho',
                 weight_start=0.0,
                 use_bias=True,
                 bias_start=0.0,
                 input_size=None):
  """Performs linear operation. Uses ortho init defined earlier."""
  shape = x.get_shape().as_list()
  with tf.variable_scope(scope or 'linear'):
    if reuse:
      tf.get_variable_scope().reuse_variables()

    w_init = None  # uniform
    if input_size is None:
      x_size = shape[1]
    else:
      x_size = input_size
    if init_w == 'zeros':
      w_init = tf.constant_initializer(0.0)
    elif init_w == 'constant':
      w_init = tf.constant_initializer(weight_start)
    elif init_w == 'gaussian':
      w_init = tf.random_normal_initializer(stddev=weight_start)
    elif init_w == 'ortho':
      w_init = lstm_ortho_initializer(1.0)

    w = tf.get_variable(
        'super_linear_w', [x_size, output_size], tf.float32, initializer=w_init)
    if use_bias:
      b = tf.get_variable(
          'super_linear_b', [output_size],
          tf.float32,
          initializer=tf.constant_initializer(bias_start))
      return tf.matmul(x, w) + b
    return tf.matmul(x, w)

Source File: rnn.py From magenta with Apache License 2.0

5 votes

def __call__(self, x, state, scope=None):
    with tf.variable_scope(scope or type(self).__name__):
      c, h = tf.split(state, 2, 1)

      x_size = x.get_shape().as_list()[1]

      w_init = None  # uniform

      h_init = lstm_ortho_initializer(1.0)

      # Keep W_xh and W_hh separate here as well to use different init methods.
      w_xh = tf.get_variable(
          'W_xh', [x_size, 4 * self.num_units], initializer=w_init)
      w_hh = tf.get_variable(
          'W_hh', [self.num_units, 4 * self.num_units], initializer=h_init)
      bias = tf.get_variable(
          'bias', [4 * self.num_units],
          initializer=tf.constant_initializer(0.0))

      concat = tf.concat([x, h], 1)
      w_full = tf.concat([w_xh, w_hh], 0)
      hidden = tf.matmul(concat, w_full) + bias

      i, j, f, o = tf.split(hidden, 4, 1)

      if self.use_recurrent_dropout:
        g = tf.nn.dropout(tf.tanh(j), self.dropout_keep_prob)
      else:
        g = tf.tanh(j)

      new_c = c * tf.sigmoid(f + self.forget_bias) + tf.sigmoid(i) * g
      new_h = tf.tanh(new_c) * tf.sigmoid(o)

      return new_h, tf.concat([new_c, new_h], 1)  # fuk tuples.

Source File: learning.py From magenta with Apache License 2.0

5 votes

def gram_matrix(feature_maps):
  """Computes the Gram matrix for a set of feature maps."""
  batch_size, height, width, channels = tf.unstack(tf.shape(feature_maps))
  denominator = tf.to_float(height * width)
  feature_maps = tf.reshape(
      feature_maps, tf.stack([batch_size, height * width, channels]))
  matrix = tf.matmul(feature_maps, feature_maps, adjoint_a=True)
  return matrix / denominator

Source File: models.py From graphics with Apache License 2.0

5 votes

def _compute_sdf(self, x, translations, blend_terms, points):
    """Compute signed distances between query points and hyperplanes."""
    n_parts = tf.shape(x)[1]
    n_planes = tf.shape(x)[2]
    norm_logit = x[..., 0:self._dims - 1]
    offset = (-(tf.nn.sigmoid(x[..., self._dims - 1:self._dims]) *
                self._offset_scale + self._offset_lbound))
    blend_planes = (
        tf.nn.sigmoid(blend_terms[..., :n_parts]) * self._blend_scale +
        self._blend_lbound)

    # Norm of the boundary line
    norm_rad = tf.tanh(norm_logit) * np.pi  # [..., (azimuth, altitude)]
    if self._dims == 3:
      norm = tf.stack([
          tf.sin(norm_rad[..., 1]) * tf.cos(norm_rad[..., 0]),
          tf.sin(norm_rad[..., 1]) * tf.sin(norm_rad[..., 0]),
          tf.cos(norm_rad[..., 1])
      ],
                      axis=-1)
    else:
      norm = tf.concat([tf.cos(norm_rad), tf.sin(norm_rad)], axis=-1)

    # Calculate signed distances to hyperplanes.
    points = (
        tf.expand_dims(points, axis=1) - tf.expand_dims(translations, axis=2))
    points = tf.expand_dims(points, axis=2)
    points = tf.tile(points, [1, 1, n_planes, 1, 1])
    signed_dis = tf.matmul(points, tf.expand_dims(norm, axis=-1))
    signed_dis = signed_dis + tf.expand_dims(offset, axis=-2)

    return signed_dis, translations, blend_planes, offset

Source File: modeling.py From albert with Apache License 2.0

5 votes

def dense_layer_3d_proj(input_tensor,
                        hidden_size,
                        head_size,
                        initializer,
                        activation,
                        use_einsum,
                        name=None):
  """A dense layer with 3D kernel for projection.

  Args:
    input_tensor: float Tensor of shape [batch,from_seq_length,
      num_attention_heads, size_per_head].
    hidden_size: The size of hidden layer.
    head_size: The size of head.
    initializer: Kernel initializer.
    activation: Actication function.
    use_einsum: bool. Whether to use einsum or reshape+matmul for dense layers.
    name: The name scope of this layer.

  Returns:
    float logits Tensor.
  """
  input_shape = get_shape_list(input_tensor)
  num_attention_heads = input_shape[2]
  with tf.variable_scope(name):
    w = tf.get_variable(
        name="kernel",
        shape=[num_attention_heads * head_size, hidden_size],
        initializer=initializer)
    w = tf.reshape(w, [num_attention_heads, head_size, hidden_size])
    b = tf.get_variable(
        name="bias", shape=[hidden_size], initializer=tf.zeros_initializer)
    if use_einsum:
      ret = tf.einsum("BFND,NDH->BFH", input_tensor, w)
    else:
      ret = einsum_via_matmul(input_tensor, w, 2)
    ret += b
  if activation is not None:
    return activation(ret)
  else:
    return ret

Python tensorflow.compat.v1.matmul() Examples