Python tensorflow.eye() Examples

The following are 30 code examples of tensorflow.eye(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function .
Example #1
Source File: fisher_factors.py    From kfac with Apache License 2.0 6 votes vote down vote up
def get_matpower(self, exp, damping_func):
    # Note that this function returns a variable which gets updated by the
    # inverse ops.  It may be stale / inconsistent with the latest value of
    # self.cov (except when exp == 1).
    if exp != 1:
      damping_id = graph_func_to_id(damping_func)
      matpower = self._matpower_by_exp_and_damping[(exp, damping_id)]
    else:
      cov = self.cov
      identity = tf.eye(cov.shape.as_list()[0], dtype=cov.dtype)
      matpower = cov + tf.cast(damping_func(), dtype=self.cov.dtype)*identity

    assert matpower.shape.ndims == 2
    return lo.LinearOperatorFullMatrix(matpower,
                                       is_non_singular=True,
                                       is_self_adjoint=True,
                                       is_positive_definite=True,
                                       is_square=True) 
Example #2
Source File: model.py    From PathCon with MIT License 6 votes vote down vote up
def _build_relation_feature(self):
        if self.feature_type == 'id':
            self.relation_dim = self.n_relations
            self.relation_features = tf.eye(self.n_relations, dtype=tf.float64)
        elif self.feature_type == 'bow':
            bow = np.load('../data/' + self.dataset + '/bow.npy')
            self.relation_dim = bow.shape[1]
            self.relation_features = tf.constant(bow, tf.float64)
        elif self.feature_type == 'bert':
            bert = np.load('../data/' + self.dataset + '/bert.npy')
            self.relation_dim = bert.shape[1]
            self.relation_features = tf.constant(bert, tf.float64)

        # the feature of the last relation (the null relation) is a zero vector
        self.relation_features = tf.concat([self.relation_features, tf.zeros([1, self.relation_dim], tf.float64)],
                                           axis=0, name='relation_features') 
Example #3
Source File: slicenet.py    From fine-lm with MIT License 6 votes vote down vote up
def rank_loss(sentence_emb, image_emb, margin=0.2):
  """Experimental rank loss, thanks to kkurach@ for the code."""
  with tf.name_scope("rank_loss"):
    # Normalize first as this is assumed in cosine similarity later.
    sentence_emb = tf.nn.l2_normalize(sentence_emb, 1)
    image_emb = tf.nn.l2_normalize(image_emb, 1)
    # Both sentence_emb and image_emb have size [batch, depth].
    scores = tf.matmul(image_emb, tf.transpose(sentence_emb))  # [batch, batch]
    diagonal = tf.diag_part(scores)  # [batch]
    cost_s = tf.maximum(0.0, margin - diagonal + scores)  # [batch, batch]
    cost_im = tf.maximum(
        0.0, margin - tf.reshape(diagonal, [-1, 1]) + scores)  # [batch, batch]
    # Clear diagonals.
    batch_size = tf.shape(sentence_emb)[0]
    empty_diagonal_mat = tf.ones_like(cost_s) - tf.eye(batch_size)
    cost_s *= empty_diagonal_mat
    cost_im *= empty_diagonal_mat
    return tf.reduce_mean(cost_s) + tf.reduce_mean(cost_im) 
Example #4
Source File: common_attention.py    From fine-lm with MIT License 6 votes vote down vote up
def gather_indices_2d(x, block_shape, block_stride):
  """Getting gather indices."""
  # making an identity matrix kernel
  kernel = tf.eye(block_shape[0] * block_shape[1])
  kernel = reshape_range(kernel, 0, 1, [block_shape[0], block_shape[1], 1])
  # making indices [1, h, w, 1] to appy convs
  x_shape = common_layers.shape_list(x)
  indices = tf.range(x_shape[2] * x_shape[3])
  indices = tf.reshape(indices, [1, x_shape[2], x_shape[3], 1])
  indices = tf.nn.conv2d(
      tf.cast(indices, tf.float32),
      kernel,
      strides=[1, block_stride[0], block_stride[1], 1],
      padding="VALID")
  # making indices [num_blocks, dim] to gather
  dims = common_layers.shape_list(indices)[:3]
  if all([isinstance(dim, int) for dim in dims]):
    num_blocks = functools.reduce(operator.mul, dims, 1)
  else:
    num_blocks = tf.reduce_prod(dims)
  indices = tf.reshape(indices, [num_blocks, -1])
  return tf.cast(indices, tf.int32) 
Example #5
Source File: layers.py    From deepchem with MIT License 6 votes vote down vote up
def radial_symmetry(self, d_cutoff, d, atom_numbers):
    """ Radial Symmetry Function """
    embedding = tf.eye(np.max(self.atom_cases) + 1)
    atom_numbers_embedded = tf.nn.embedding_lookup(embedding, atom_numbers)

    Rs = np.linspace(0., self.radial_cutoff, self.radial_length)
    ita = np.ones_like(Rs) * 3 / (Rs[1] - Rs[0])**2
    Rs = tf.cast(np.reshape(Rs, (1, 1, 1, -1)), tf.float32)
    ita = tf.cast(np.reshape(ita, (1, 1, 1, -1)), tf.float32)
    length = ita.get_shape().as_list()[-1]

    d_cutoff = tf.stack([d_cutoff] * length, axis=3)
    d = tf.stack([d] * length, axis=3)

    out = tf.exp(-ita * tf.square(d - Rs)) * d_cutoff
    if self.atomic_number_differentiated:
      out_tensors = []
      for atom_type in self.atom_cases:
        selected_atoms = tf.expand_dims(
            tf.expand_dims(atom_numbers_embedded[:, :, atom_type], axis=1),
            axis=3)
        out_tensors.append(tf.reduce_sum(out * selected_atoms, axis=2))
      return tf.concat(out_tensors, axis=2)
    else:
      return tf.reduce_sum(out, axis=2) 
Example #6
Source File: transformers.py    From deepchem with MIT License 6 votes vote down vote up
def radial_symmetry(self, d_cutoff, d, atom_numbers):
    """ Radial Symmetry Function """
    embedding = tf.eye(np.max(self.atom_cases) + 1)
    atom_numbers_embedded = tf.nn.embedding_lookup(embedding, atom_numbers)

    Rs = np.linspace(0., self.radial_cutoff, self.radial_length)
    ita = np.ones_like(Rs) * 3 / (Rs[1] - Rs[0])**2
    Rs = tf.cast(np.reshape(Rs, (1, 1, 1, -1)), tf.float32)
    ita = tf.cast(np.reshape(ita, (1, 1, 1, -1)), tf.float32)
    length = ita.get_shape().as_list()[-1]

    d_cutoff = tf.stack([d_cutoff] * length, axis=3)
    d = tf.stack([d] * length, axis=3)

    out = tf.exp(-ita * tf.square(d - Rs)) * d_cutoff
    if self.atomic_number_differentiated:
      out_tensors = []
      for atom_type in self.atom_cases:
        selected_atoms = tf.expand_dims(
            tf.expand_dims(atom_numbers_embedded[:, :, atom_type], axis=1),
            axis=3)
        out_tensors.append(tf.reduce_sum(out * selected_atoms, axis=2))
      return tf.concat(out_tensors, axis=2)
    else:
      return tf.reduce_sum(out, axis=2) 
Example #7
Source File: model.py    From minimal-entropy-correlation-alignment with MIT License 6 votes vote down vote up
def log_coral_loss(self, h_src, h_trg, gamma=1e-3):
	# regularized covariances result in inf or nan
	# First: subtract the mean from the data matrix
	batch_size = tf.to_float(tf.shape(h_src)[0])
	h_src = h_src - tf.reduce_mean(h_src, axis=0) 
	h_trg = h_trg - tf.reduce_mean(h_trg, axis=0 )
	cov_source = (1./(batch_size-1)) * tf.matmul( h_src, h_src, transpose_a=True) #+ gamma * tf.eye(self.hidden_repr_size)
	cov_target = (1./(batch_size-1)) * tf.matmul( h_trg, h_trg, transpose_a=True) #+ gamma * tf.eye(self.hidden_repr_size)
	#eigen decomposition
	eig_source  = tf.self_adjoint_eig(cov_source)
	eig_target  = tf.self_adjoint_eig(cov_target)
	log_cov_source = tf.matmul( eig_source[1] ,  tf.matmul(tf.diag( tf.log(eig_source[0]) ), eig_source[1], transpose_b=True) )
	log_cov_target = tf.matmul( eig_target[1] ,  tf.matmul(tf.diag( tf.log(eig_target[0]) ), eig_target[1], transpose_b=True) )

	# Returns the Frobenius norm
	return tf.reduce_mean(tf.square( tf.subtract(log_cov_source,log_cov_target))) 
	#~ return tf.reduce_mean(tf.reduce_max(eig_target[0]))
	#~ return tf.to_float(tf.equal(tf.count_nonzero(h_src), tf.count_nonzero(h_src))) 
Example #8
Source File: utils.py    From Tensorflow-Cookbook with MIT License 6 votes vote down vote up
def orthogonal_regularizer(scale) :
    """ Defining the Orthogonal regularizer and return the function at last to be used in Conv layer as kernel regularizer"""

    def ortho_reg(w) :
        """ Reshaping the matrxi in to 2D tensor for enforcing orthogonality"""
        _, _, _, c = w.get_shape().as_list()

        w = tf.reshape(w, [-1, c])

        """ Declaring a Identity Tensor of appropriate size"""
        identity = tf.eye(c)

        """ Regularizer Wt*W - I """
        w_transpose = tf.transpose(w)
        w_mul = tf.matmul(w_transpose, w)
        reg = tf.subtract(w_mul, identity)

        """Calculating the Loss Obtained"""
        ortho_loss = tf.nn.l2_loss(reg)

        return scale * ortho_loss

    return ortho_reg 
Example #9
Source File: utils.py    From Tensorflow-Cookbook with MIT License 6 votes vote down vote up
def orthogonal_regularizer_fully(scale) :
    """ Defining the Orthogonal regularizer and return the function at last to be used in Fully Connected Layer """

    def ortho_reg_fully(w) :
        """ Reshaping the matrix in to 2D tensor for enforcing orthogonality"""
        _, c = w.get_shape().as_list()

        """Declaring a Identity Tensor of appropriate size"""
        identity = tf.eye(c)
        w_transpose = tf.transpose(w)
        w_mul = tf.matmul(w_transpose, w)
        reg = tf.subtract(w_mul, identity)

        """ Calculating the Loss """
        ortho_loss = tf.nn.l2_loss(reg)

        return scale * ortho_loss

    return ortho_reg_fully 
Example #10
Source File: batch_lbs.py    From tf_smpl with MIT License 6 votes vote down vote up
def batch_rodrigues(theta, name=None):
    """
    Theta is N x 3
    """
    with tf.variable_scope(name, "batch_rodrigues", [theta]):
        batch_size = tf.shape(theta)[0]

        angle = tf.expand_dims(tf.norm(theta + 1e-8, axis=1), -1)
        r = tf.expand_dims(tf.div(theta, angle), -1)

        angle = tf.expand_dims(angle, -1)
        cos = tf.cos(angle)
        sin = tf.sin(angle)

        outer = tf.matmul(r, r, transpose_b=True, name="outer")

        eyes = tf.tile(tf.expand_dims(tf.eye(3), 0), [batch_size, 1, 1])
        R = cos * eyes + (1 - cos) * outer + sin * batch_skew(
            r, batch_size=batch_size)
        return R 
Example #11
Source File: batch_lbs.py    From tf_smpl with MIT License 6 votes vote down vote up
def batch_lrotmin(theta, name=None):
    """ NOTE: not used bc I want to reuse R and this is simple.
    Output of this is used to compute joint-to-pose blend shape mapping.
    Equation 9 in SMPL paper.


    Args:
      pose: `Tensor`, N x 72 vector holding the axis-angle rep of K joints.
            This includes the global rotation so K=24

    Returns
      diff_vec : `Tensor`: N x 207 rotation matrix of 23=(K-1) joints with identity subtracted.,
    """
    with tf.variable_scope(name, "batch_lrotmin", [theta]):
        with tf.variable_scope("ignore_global"):
            theta = theta[:, 3:]

        # N*23 x 3 x 3
        Rs = batch_rodrigues(tf.reshape(theta, [-1, 3]))
        lrotmin = tf.reshape(Rs - tf.eye(3), [-1, 207])

        return lrotmin 
Example #12
Source File: layers.py    From PADME with MIT License 6 votes vote down vote up
def radial_symmetry(self, d_cutoff, d, atom_numbers):
    """ Radial Symmetry Function """
    embedding = tf.eye(np.max(self.atom_cases) + 1)
    atom_numbers_embedded = tf.nn.embedding_lookup(embedding, atom_numbers)

    Rs = np.linspace(0., self.radial_cutoff, self.radial_length)
    ita = np.ones_like(Rs) * 3 / (Rs[1] - Rs[0])**2
    Rs = tf.to_float(np.reshape(Rs, (1, 1, 1, -1)))
    ita = tf.to_float(np.reshape(ita, (1, 1, 1, -1)))
    length = ita.get_shape().as_list()[-1]

    d_cutoff = tf.stack([d_cutoff] * length, axis=3)
    d = tf.stack([d] * length, axis=3)

    out = tf.exp(-ita * tf.square(d - Rs)) * d_cutoff
    if self.atomic_number_differentiated:
      out_tensors = []
      for atom_type in self.atom_cases:
        selected_atoms = tf.expand_dims(
            tf.expand_dims(atom_numbers_embedded[:, :, atom_type], axis=1),
            axis=3)
        out_tensors.append(tf.reduce_sum(out * selected_atoms, axis=2))
      return tf.concat(out_tensors, axis=2)
    else:
      return tf.reduce_sum(out, axis=2) 
Example #13
Source File: transformers.py    From PADME with MIT License 6 votes vote down vote up
def radial_symmetry(self, d_cutoff, d, atom_numbers):
    """ Radial Symmetry Function """
    embedding = tf.eye(np.max(self.atom_cases) + 1)
    atom_numbers_embedded = tf.nn.embedding_lookup(embedding, atom_numbers)

    Rs = np.linspace(0., self.radial_cutoff, self.radial_length)
    ita = np.ones_like(Rs) * 3 / (Rs[1] - Rs[0])**2
    Rs = tf.to_float(np.reshape(Rs, (1, 1, 1, -1)))
    ita = tf.to_float(np.reshape(ita, (1, 1, 1, -1)))
    length = ita.get_shape().as_list()[-1]

    d_cutoff = tf.stack([d_cutoff] * length, axis=3)
    d = tf.stack([d] * length, axis=3)

    out = tf.exp(-ita * tf.square(d - Rs)) * d_cutoff
    if self.atomic_number_differentiated:
      out_tensors = []
      for atom_type in self.atom_cases:
        selected_atoms = tf.expand_dims(
            tf.expand_dims(atom_numbers_embedded[:, :, atom_type], axis=1),
            axis=3)
        out_tensors.append(tf.reduce_sum(out * selected_atoms, axis=2))
      return tf.concat(out_tensors, axis=2)
    else:
      return tf.reduce_sum(out, axis=2) 
Example #14
Source File: attention_test.py    From shortest-path with The Unlicense 6 votes vote down vote up
def test_softmax_masking(self):

        max_len = 3
        axis = 1
        logits = tf.eye(max_len)
        seq_len = [1,2,2]
        mask = tf.sequence_mask(seq_len, max_len)

        r = softmax_with_masking(logits, mask, axis)
        r = np.array(r)

        d = math.exp(1) + math.exp(0)

        expected = np.array([
            [1,0,0],
            [math.exp(0)/d, math.exp(1)/d,0],
            [0.5, 0.5, 0],
        ])

        np.testing.assert_almost_equal(r, expected) 
Example #15
Source File: decode.py    From shortest-path with The Unlicense 6 votes vote down vote up
def execute_reasoning(args, features, **kwargs):

	d_eye = tf.eye(args["max_decode_iterations"])

	iteration_id = [
		tf.tile(tf.expand_dims(d_eye[i], 0), [features["d_batch_size"], 1])
		for i in range(args["max_decode_iterations"])
	]

	inputs = [iteration_id]

	final_output, out_taps = static_decode(args, features, inputs, **kwargs)


	final_output = dynamic_assert_shape(final_output, [features["d_batch_size"], args["output_width"]])


	return final_output, out_taps 
Example #16
Source File: layer_collection_test.py    From kfac with Apache License 2.0 6 votes vote down vote up
def testLossFunctionByName(self):
    """Ensure loss functions can be identified by name."""
    with tf.Graph().as_default():
      logits = tf.eye(2)
      lc = layer_collection.LayerCollection()

      # Create a new loss function by name.
      lc.register_categorical_predictive_distribution(logits, name='loss1')
      self.assertEqual(1, len(lc.towers_by_loss))

      # Add logits to same loss function.
      lc.register_categorical_predictive_distribution(
          logits, name='loss1', reuse=True)
      self.assertEqual(1, len(lc.towers_by_loss))

      # Add another new loss function.
      lc.register_categorical_predictive_distribution(logits, name='loss2')
      self.assertEqual(2, len(lc.towers_by_loss)) 
Example #17
Source File: models.py    From anica with MIT License 5 votes vote down vote up
def compute_jacobian(self, o):
        # 90s style manual gradient computations
        x, entrance_h, exit_h, y = self.forward(o)
        entrance_h_prime = tf.matmul(tf.eye(self.input_dim),
                                     self.entrance_w_in)
        # entrance_h_prime is now (input_dim, input_dim*hidden_dim)
        entrance_h_prime = tf.expand_dims(entrance_h_prime, 0)
        entrance_h_prime *= tf.expand_dims(1 - entrance_h**2, 1)
        # entrance_h_prime should be (batch, input_dim, input_dim*hidden_dim)
        entrance_o_prime = tf.einsum('aij,jk->aik', entrance_h_prime,
                self.entrance_w_out) 
        # we're at (batch, input_dim, input_dim)
        y_prime = tf.einsum('aij,jk->aik',
                            entrance_o_prime,
                            self.F.layers[0][0])
        # still at (batch, input_dim, input_dim)
        exit_h_prime = tf.einsum('aij,jk->aik',
                                 y_prime,
                                 self.exit_w_in) 
        # (batch, input_dim, input_dim*hidden_dim)
        # exit_h should be (batch, input_dim*hidden_dim)
        exit_h_prime = exit_h_prime * tf.expand_dims(1 - exit_h**2, 1)
        w_out = self.exit_w_out / tf.sqrt(tf.reduce_sum(self.exit_w_out**2, 0,
                                                   keep_dims=True))
        w_out = w_out / tf.sqrt(tf.cast(self.hidden_dim, 'float32'))
        # J should be (batch, input_dim, input_dim)
        J = tf.einsum('aij,jk->aik', exit_h_prime, w_out) 
        return J 
Example #18
Source File: models.py    From anica with MIT License 5 votes vote down vote up
def compute_jacobian(self, o):
        x, h, _ = self.forward(o) # h is (batch, input_dim*hidden_dim)
        y_prime = self.F(tf.eye(self.input_dim), add_bias=False)
        h_prime = tf.matmul(y_prime, self.w_in)
        # h_prime is now (input_dim, input_dim*hidden_dim)
        h_prime = tf.expand_dims(h_prime, 0)
        h_prime = h_prime * tf.expand_dims(1 - h**2, 1)
        # h_prime should now be (batch, input_dim, input_dim*hidden_dim)
        w_out = self.w_out / tf.sqrt(tf.reduce_sum(self.w_out**2, 0,
                                                   keep_dims=True))
        w_out = w_out / tf.sqrt(tf.cast(self.hidden_dim, 'float32'))
        # J should be (batch, input_dim, input_dim)
        # FIXME: einsum only seems to work when the batch dimension is known.
        J = tf.einsum('aij,jk->aik', h_prime, w_out) 
        return J 
Example #19
Source File: models.py    From HSLN-Joint-Sentence-Classification with MIT License 5 votes vote down vote up
def add_loss_op(self, logits, logits_no_dropout, labels, document_lengths):
        """Defines the loss"""
        if self.config.use_crf:
            log_likelihood, trans_params = tf.contrib.crf.crf_log_likelihood(
                    logits, labels, document_lengths)
            self.trans_params = trans_params # need to evaluate it for decoding
            loss = tf.reduce_mean(-log_likelihood)
        else:
            losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logits, labels=labels)
            mask = tf.sequence_mask(document_lengths)
            losses = tf.boolean_mask(losses, mask)
            loss = tf.reduce_mean(losses)

        # add l2 regularization
        l2 = self.config.l2_reg_lambda * sum([
            tf.nn.l2_loss(tf_var)
            for tf_var in tf.trainable_variables()
            if not ("noreg" in tf_var.name or "bias" in tf_var.name)])
        loss += l2

        # add dropout loss
        if logits_no_dropout is not None:
            self.drop_loss = tf.nn.l2_loss(tf.subtract(logits, logits_no_dropout))
            loss += self.config.drop_penalty * self.drop_loss

        # add attention matrix penalty
        if self.config.attention_hop > 1:
            A_T = tf.transpose(self.A, perm=[0, 2, 1])
            self.attention_loss = self.Frobenius(tf.einsum('aij,ajk->aik', self.A, A_T) - \
                tf.eye(self.config.attention_hop, batch_shape=[tf.shape(self.A)[0]]))
            loss += self.config.attention_penalty * self.attention_loss

        # for tensorboard
        tf.summary.scalar("loss", loss)

        return loss 
Example #20
Source File: utils_test.py    From kfac with Apache License 2.0 5 votes vote down vote up
def testPosDefInvMatrixInverse(self):
    with tf.Graph().as_default(), self.test_session() as sess:
      tf.set_random_seed(200)
      np.random.seed(0)
      square = lambda x: np.dot(x, x.T)

      size = 3
      x = square(np.random.randn(size, size))
      damp = 0.1
      identity = tf.eye(size, dtype=tf.float64)

      tf_inv = utils.posdef_inv_matrix_inverse(tf.constant(x), identity, damp)
      np_inv = np.linalg.inv(x + damp * np.eye(size))
      self.assertAllClose(sess.run(tf_inv), np_inv) 
Example #21
Source File: attention_test.py    From shortest-path with The Unlicense 5 votes vote down vote up
def test_softmax_write(self):

        max_len = 6
        keys = tf.expand_dims(tf.eye(max_len), 0)
        target = 3
        batch_len = 1

        table, focus, taps = attention_write_by_key(keys, keys[:,target,:], tf.ones([batch_len, max_len]))

        d = math.exp(1) + (max_len-1) * math.exp(0)
        exp = np.full([batch_len, max_len, max_len], 1/d)
        exp[:,target,:] = (d-5)/d

        np.set_printoptions(threshold=np.inf)
        np.testing.assert_almost_equal(table.numpy(), exp) 
Example #22
Source File: spherenet_linear_sphereconv_wsoftmax.py    From SphereNet with MIT License 5 votes vote down vote up
def _add_orthogonal_constraint(self, filt, n_filt):
        
        filt = tf.reshape(filt, [-1, n_filt])
        inner_pro = tf.matmul(tf.transpose(filt), filt)

        loss = 2e-4*tf.nn.l2_loss(inner_pro-tf.eye(n_filt))
        tf.add_to_collection('orth_constraint', loss) 
Example #23
Source File: pyramid.py    From graphics with Apache License 2.0 5 votes vote down vote up
def _binomial_kernel(num_channels, dtype=tf.float32):
  """Creates a 5x5 binomial kernel.

  Args:
    num_channels: The number of channels of the image to filter.
    dtype: The type of an element in the kernel.

  Returns:
    A tensor of shape `[5, 5, num_channels, num_channels]`.
  """
  kernel = np.array((1., 4., 6., 4., 1.), dtype=dtype.as_numpy_dtype)
  kernel = np.outer(kernel, kernel)
  kernel /= np.sum(kernel)
  kernel = kernel[:, :, np.newaxis, np.newaxis]
  return tf.constant(kernel, dtype=dtype) * tf.eye(num_channels, dtype=dtype) 
Example #24
Source File: func.py    From zero with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def attention_bias(inputs, mode, inf=None, name=None):
    """ A bias tensor used in attention mechanism"""

    if inf is None:
        inf = dtype.inf()

    with tf.name_scope(name, default_name="attention_bias", values=[inputs]):
        if mode == "causal":
            length = inputs
            lower_triangle = tf.matrix_band_part(
                tf.ones([length, length]), -1, 0
            )
            ret = dtype.tf_to_float(- inf * (1.0 - lower_triangle))
            return tf.reshape(ret, [1, 1, length, length])
        elif mode == "masking":
            mask = inputs
            ret = (1.0 - mask) * - inf
            return tf.expand_dims(tf.expand_dims(ret, 1), 1)
        elif mode == "aan":
            length = tf.shape(inputs)[1]
            diagonal = tf.eye(length)
            cum_factor = tf.expand_dims(tf.cumsum(diagonal, axis=0), 0)
            mask = tf.expand_dims(inputs, 1) * tf.expand_dims(inputs, 2)
            mask *= dtype.tf_to_float(cum_factor)
            weight = tf.nn.softmax(mask + (1.0 - mask) * - inf)
            weight *= mask
            return weight
        else:
            raise ValueError("Unknown mode %s" % mode) 
Example #25
Source File: embed_cpc_task_v1.py    From BERT with Apache License 2.0 5 votes vote down vote up
def get_labels_of_similarity(query_input_ids, anchor_query_ids):
	idxs_1 = tf.expand_dims(query_input_ids, axis=1) # batch 1 seq
	idxs_2 = tf.expand_dims(anchor_query_ids, axis=0) # 1 batch seq
	# batch x batch x seq
	labels = tf.cast(tf.not_equal(idxs_1, idxs_2), tf.float32) # not equal:1, equal:0
	equal_num = tf.reduce_sum(labels, axis=-1) # [batch, batch]
	not_equal_label = tf.cast(tf.not_equal(equal_num, 0), tf.float32)
	equal_label = tf.cast(tf.equal(equal_num, 0), tf.float32)
	not_equal_label_shape = bert_utils.get_shape_list(not_equal_label, expected_rank=[2,3])
	not_equal_label *= tf.cast(1 - tf.eye(not_equal_label_shape[0]), tf.float32) 
	return not_equal_label, equal_label 
Example #26
Source File: model_fn_crf.py    From BERT with Apache License 2.0 5 votes vote down vote up
def zero_transition(shape):
	transition = tf.zeros((shape[1], shape[1]))
	transition = transition - tf.eye(shape[1])*NAN
	return tf.cast(transition, tf.float32) 
Example #27
Source File: model_fn.py    From BERT with Apache License 2.0 5 votes vote down vote up
def get_labels_of_similarity(query_input_ids, anchor_query_ids):
	idxs_1 = tf.expand_dims(query_input_ids, axis=1) # batch 1 seq
	idxs_2 = tf.expand_dims(anchor_query_ids, axis=0) # 1 batch seq
	# batch x batch x seq
	labels = tf.cast(tf.not_equal(idxs_1, idxs_2), tf.float32) # not equal:1, equal:0
	equal_num = tf.reduce_sum(labels, axis=-1) # [batch, batch]
	not_equal_label = tf.cast(tf.not_equal(equal_num, 0), tf.float32)
	not_equal_label_shape = bert_utils.get_shape_list(not_equal_label, expected_rank=[2,3])
	not_equal_label *= tf.cast(1 - tf.eye(not_equal_label_shape[0]), tf.float32) 
	equal_label = (1 - not_equal_label) - tf.eye(not_equal_label_shape[0])
	return equal_label, not_equal_label 
Example #28
Source File: cpc_utils.py    From BERT with Apache License 2.0 5 votes vote down vote up
def WPC_Hidden(student_tensor, teacher_tensor, input_mask, opt=None):
	teacher_shape = bert_utils.get_shape_list(teacher_tensor[0], expected_rank=[3])
	student_shape = bert_utils.get_shape_list(student_tensor[0], expected_rank=[3])

	with tf.variable_scope("wpc_weights", reuse=tf.AUTO_REUSE): 
		cpc_weights = tf.get_variable(
				"weights", [student_shape[-1],teacher_shape[-1]],
				initializer=create_initializer(0.02)
				)

	flipped_student_tensor = flip_gradient(student_tensor[-1])
	flipped_teacher_tensor = flip_gradient(teacher_tensor[-1])

	# batch x seq x t_hidden
	student_tensor_proj = tf.einsum("abc,cd->abd", flipped_student_tensor, cpc_weights)
	# batch x seq x t_hidden and batch x seq x t_hidden
	# log exp(zt x W x ct)
	# batch x batch x seq
	cpc_tensor = tf.einsum("abd,cbd->acb", student_tensor_proj, flipped_teacher_tensor)

	mask = tf.cast(input_mask, tf.float32) # batch x seq

	joint_sample_mask = tf.eye(student_shape[0], dtype=bool)
	joint_sample_mask = tf.expand_dims(joint_sample_mask, axis=-1) # batch x batch x 1

	joint_masked_cpc_tensor = tf.cast(joint_sample_mask, tf.float32) * cpc_tensor
	marginal_masked_cpc_tensor = cpc_tensor

	# got each seq joint term
	joint_term = tf.reduce_sum(joint_masked_cpc_tensor, axis=[1]) # batch x seq

	marginal_term = tf.reduce_logsumexp(marginal_masked_cpc_tensor, axis=[1]) # batch x seq

	loss = -tf.reduce_sum((joint_term - marginal_term)*mask) / (1e-10 + tf.reduce_sum(mask))

	# wpc_grad = opt.compute_gradients(loss, [])
		
	# wpc_grad = tf.sqrt(tf.reduce_sum(tf.square(wpc_grad), axis=1))
	# wpc_grad_penality = tf.reduce_mean(tf.square(wpc_grad - 1.0) * 0.1)

	return loss 
Example #29
Source File: cpc_utils.py    From BERT with Apache License 2.0 5 votes vote down vote up
def CPC_Hidden(student_tensor, teacher_tensor, input_mask):

	# input_mask: batch x seq

	teacher_shape = bert_utils.get_shape_list(teacher_tensor[0], expected_rank=[3])
	student_shape = bert_utils.get_shape_list(student_tensor[0], expected_rank=[3])

	with tf.variable_scope("cpc_weights", reuse=tf.AUTO_REUSE): 
		cpc_weights = tf.get_variable(
				"weights", [student_shape[-1],teacher_shape[-1]],
				initializer=create_initializer(0.02)
				)

	# batch x seq x t_hidden
	student_tensor_proj = tf.einsum("abc,cd->abd", student_tensor[-1], cpc_weights)
	# batch x seq x t_hidden and batch x seq x t_hidden
	# log exp(zt x W x ct)
	# batch x batch x seq
	cpc_tensor = tf.einsum("abd,cbd->acb", student_tensor_proj, teacher_tensor[-1])

	mask = tf.cast(input_mask, tf.float32) # batch x seq

	joint_sample_mask = tf.eye(student_shape[0], dtype=bool)
	joint_sample_mask = tf.expand_dims(joint_sample_mask, axis=-1) # batch x batch x 1

	joint_masked_cpc_tensor = tf.cast(joint_sample_mask, tf.float32) * cpc_tensor
	marginal_masked_cpc_tensor = cpc_tensor

	# got each seq joint term
	joint_term = tf.reduce_sum(joint_masked_cpc_tensor, axis=[1]) # batch x seq

	marginal_term = tf.reduce_logsumexp(marginal_masked_cpc_tensor, axis=[1]) # batch x seq

	# log_n = tf.math.log(tf.cast(cpc_tensor.shape[1], cpc_tensor.dtype))

	return -tf.reduce_sum((joint_term - marginal_term)*mask) / (1e-10 + tf.reduce_sum(mask)) 
Example #30
Source File: layers.py    From neuron with GNU General Public License v3.0 5 votes vote down vote up
def _single_aff_to_shift(self, trf, volshape):
        if len(trf.shape) == 1:  # go from vector to matrix
            trf = tf.reshape(trf, [self.ndims, self.ndims + 1])

        # note this is unnecessarily extra graph since at every batch entry we have a tf.eye graph
        trf += tf.eye(self.ndims+1)[:self.ndims,:]  # add identity, hence affine is a shift from identitiy
        return affine_to_shift(trf, volshape, shift_center=True)