Python chainer.functions.matmul() Examples

The following are 30 code examples of chainer.functions.matmul(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module chainer.functions , or try the search function .
Example #1
Source File: state_q_functions.py    From chainerrl with MIT License 6 votes vote down vote up
def __call__(self, state):
        h = self.hidden_layers(state)
        v = self.v(h)
        mu = self.mu(h)

        if self.scale_mu:
            mu = scale_by_tanh(mu, high=self.action_space.high,
                               low=self.action_space.low)

        mat_diag = F.exp(self.mat_diag(h))
        if hasattr(self, 'mat_non_diag'):
            mat_non_diag = self.mat_non_diag(h)
            tril = lower_triangular_matrix(mat_diag, mat_non_diag)
            mat = F.matmul(tril, tril, transb=True)
        else:
            mat = F.expand_dims(mat_diag ** 2, axis=2)
        return QuadraticActionValue(
            mu, mat, v, min_action=self.action_space.low,
            max_action=self.action_space.high) 
Example #2
Source File: convolution_rbm.py    From SeRanet with MIT License 6 votes vote down vote up
def propup(self, vis):
        """
        This function propagates the visible units activation upwards to the hidden units
        Eq.(7)
        :param vis: Variable Matrix(batch_size, in_channels, image_height, image_width)
                    - given v_sample
        :return: Variable Matrix(batch_size, out_channels, image_height_out, image_width_out)
                 - probability for each hidden units to be h_i=1
        """
        # conv.W: Matrix(out_channels, in_channels, filter height=ksize, filter width=ksize)
        # conv.b: Vec   (out_channels, )
        if self.real == 0:
            pre_sigmoid_activation = self.conv(vis)
        else:
            pre_sigmoid_activation = self.conv(vis / self.std_ch)
        # F.matmul(vis, self.conv.W, transb=True) + F.broadcast_to(self.conv.b, (vis.data.shape[0], self.n_hidden))
        return F.sigmoid(pre_sigmoid_activation) 
Example #3
Source File: convolution_rbm.py    From SeRanet with MIT License 6 votes vote down vote up
def propdown(self, hid):
        """ This function propagates the hidden units activation downwords to the visible units
        :param hid: Variable Matrix(batch_size, out_channels, image_height_out, image_width_out)  - given h_sample
        :return: Variable Matrix(batch_size, in_channels, image_height, image_width) - probability for each visible units to be v_j = 1
        """
        batch_size = hid.data.shape[0]
        if self.real == 0:
            W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
            pre_sigmoid_activation = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1)
                # F.matmul(hid, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible))
            v_mean = F.sigmoid(pre_sigmoid_activation)
            #print('W info ', self.conv.W.data.shape, 'W_flipped info ', W_flipped.data.shape)
            #print('W info ', self.conv.W.data[3, 0, 2, 3], 'W_flipped info ', W_flipped.data[0, 3, 8, 7])
            #print('W info ', self.conv.W.data[3, 0, 8, 7], 'W_flipped info ', W_flipped.data[0, 3, 2, 3])
            #print('W info ', self.conv.W.data[19, 0, 4, 0], 'W_flipped info ', W_flipped.data[0, 19, 6, 10])
            #print('pre_sigmoidactivation', F.sum(pre_sigmoid_activation).data)
            #print('v_mean', v_mean.data.shape)
            #print('v_mean sum', F.sum(v_mean).data)
            #print('hid', hid.data.shape)

        else:
            # TODO: check
            W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
            v_mean = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1)
        return v_mean 
Example #4
Source File: convolution_rbm.py    From SeRanet with MIT License 6 votes vote down vote up
def reconstruct(self, v):
        """

        :param v: Variable Matrix(batch_size, in_channels, image_height, image_width)
        :return: reconstructed_v, Variable Matrix(batch_size, in_channels, image_height, image_width)
        """
        batch_size = v.data.shape[0]
        xp = cuda.get_array_module(v.data)
        if self.real == 0:
            h = F.sigmoid(self.conv(v))
        else:
            std_ch = xp.reshape(self.std, (1, self.in_channels, 1, 1))
            h = F.sigmoid(self.conv(v / std_ch))
        # F.sigmoid(F.matmul(v, self.l.W, transb=True) + F.broadcast_to(self.l.b, (batch_size, self.n_hidden)))
        W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
        reconstructed_v = F.sigmoid(F.convolution_2d(h, W_flipped, self.conv.a, pad=self.ksize-1))
            # = F.sigmoid(F.matmul(h, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible)))
        return reconstructed_v 
Example #5
Source File: models.py    From EEND with MIT License 6 votes vote down vote up
def dc_loss(embedding, label):
    """
    Deep clustering loss function.

    Args:
      embedding: (T,D)-shaped activation values
      label: (T,C)-shaped labels
    return:
      (1,)-shaped squared flobenius norm of the difference
      between embedding and label affinity matrices
    """
    xp = cuda.get_array_module(label)
    b = xp.zeros((label.shape[0], 2**label.shape[1]))
    b[np.arange(label.shape[0]),
      [int(''.join(str(x) for x in t), base=2) for t in label.data]] = 1

    label_f = chainer.Variable(b.astype(np.float32))
    loss = F.sum(F.square(F.matmul(embedding, embedding, True, False))) \
        + F.sum(F.square(F.matmul(label_f, label_f, True, False))) \
        - 2 * F.sum(F.square(F.matmul(embedding, label_f, True, False)))
    return loss 
Example #6
Source File: embed_mixture.py    From lda2vec with MIT License 6 votes vote down vote up
def __call__(self, doc_ids, update_only_docs=False):
        """ Given an array of document integer indices, returns a vector
        for each document. The vector is composed of topic weights projected
        onto topic vectors.

        Args:
            doc_ids : chainer.Variable
                One-dimensional batch vectors of IDs

        Returns:
            doc_vector : chainer.Variable
                Batch of two-dimensional embeddings for every document.
        """
        # (batchsize, ) --> (batchsize, multinomial)
        proportions = self.proportions(doc_ids, softmax=True)
        # (batchsize, n_factors) * (n_factors, n_dim) --> (batchsize, n_dim)
        factors = F.dropout(self.factors(), ratio=self.dropout_ratio)
        if update_only_docs:
            factors.unchain_backward()
        w_sum = F.matmul(proportions, factors)
        return w_sum 
Example #7
Source File: set2set.py    From chainer-chemistry with MIT License 6 votes vote down vote up
def __call__(self, h):
        # type: (chainer.Variable) -> chainer.Variable
        xp = cuda.get_array_module(h)
        mb, node, ch = h.shape  # type: int, int, int
        if self.q_star is None:
            self.q_star = [
                xp.zeros((1, self.in_channels * 2)).astype('f')
                for _ in range(mb)
            ]
        self.hx, self.cx, q = self.lstm_layer(self.hx, self.cx, self.q_star)
        # self.hx: (mb, mb, ch)
        # self.cx: (mb, mb, ch)
        # q: List[(1, ch) * mb]
        q = functions.stack(q)  # q: (mb, 1, ch)
        q_ = functions.transpose(q, axes=(0, 2, 1))  # q_: (mb, ch, 1)
        e = functions.matmul(h, q_)  # e: (mb, node, 1)
        a = functions.softmax(e)  # a: (mb, node, 1)
        a = functions.broadcast_to(a, h.shape)  # a: (mb, node, ch)
        r = functions.sum((a * h), axis=1, keepdims=True)  # r: (mb, 1, ch)
        q_star_ = functions.concat((q, r), axis=2)  # q_star_: (mb, 1, ch*2)
        self.q_star = functions.separate(q_star_)
        return functions.reshape(q_star_, (mb, ch * 2)) 
Example #8
Source File: state_q_functions.py    From chainerrl with MIT License 6 votes vote down vote up
def __call__(self, state):
        h = state
        for layer in self.hidden_layers:
            h = F.relu(layer(h))
        v = self.v(h)
        mu = self.mu(h)

        if self.scale_mu:
            mu = scale_by_tanh(mu, high=self.action_space.high,
                               low=self.action_space.low)

        mat_diag = F.exp(self.mat_diag(h))
        if hasattr(self, 'mat_non_diag'):
            mat_non_diag = self.mat_non_diag(h)
            tril = lower_triangular_matrix(mat_diag, mat_non_diag)
            mat = F.matmul(tril, tril, transb=True)
        else:
            mat = F.expand_dims(mat_diag ** 2, axis=2)
        return QuadraticActionValue(
            mu, mat, v, min_action=self.action_space.low,
            max_action=self.action_space.high) 
Example #9
Source File: categorical_dqn.py    From chainerrl with MIT License 6 votes vote down vote up
def compute_weighted_value_loss(eltwise_loss, batch_size, weights,
                                batch_accumulator='mean'):
    """Compute a loss for value prediction problem.

    Args:
        eltwise_loss (Variable): Element-wise loss per example per atom
        weights (ndarray): Weights for y, t.
        batch_accumulator (str): 'mean' will divide loss by batchsize
    Returns:
        (Variable) scalar loss
    """
    assert batch_accumulator in ('mean', 'sum')

    # eltwise_loss is (batchsize, n_atoms) array of losses
    # weights is an array of shape (batch_size)
    # sum loss across atoms and then apply weight per example in batch
    loss_sum = F.matmul(F.sum(eltwise_loss, axis=1), weights)
    if batch_accumulator == 'mean':
        loss = loss_sum / batch_size
    elif batch_accumulator == 'sum':
        loss = loss_sum
    return loss 
Example #10
Source File: iqn.py    From chainerrl with MIT License 6 votes vote down vote up
def compute_weighted_value_loss(eltwise_loss, weights,
                                batch_accumulator='mean'):
    """Compute a loss for value prediction problem.

    Args:
        eltwise_loss (Variable): Element-wise loss per example
        weights (ndarray): Weights for y, t.
        batch_accumulator (str): 'mean' will divide loss by batchsize
    Returns:
        (Variable) scalar loss
    """
    batch_size = eltwise_loss.shape[0]
    assert batch_accumulator in ('mean', 'sum')
    assert eltwise_loss.ndim == 3
    # eltwise_loss is (batchsize, n , n') array of losses
    # weights is an array of shape (batch_size)
    # apply weights per example in batch
    loss_sum = F.matmul(F.sum(F.mean(eltwise_loss, axis=2), axis=1), weights)
    if batch_accumulator == 'mean':
        loss = loss_sum / batch_size
    elif batch_accumulator == 'sum':
        loss = loss_sum
    return loss 
Example #11
Source File: net.py    From chainer-gan-lib with MIT License 5 votes vote down vote up
def backward_linear(x_in, x, l):
    y = F.matmul(x, l.W)
    return y 
Example #12
Source File: attention.py    From espnet with Apache License 2.0 5 votes vote down vote up
def forward(self, e_var, s_var=None, mask=None, batch=1):
        """Core function of the Multi-head attention layer.

        Args:
            e_var (chainer.Variable): Variable of input array.
            s_var (chainer.Variable): Variable of source array from encoder.
            mask (chainer.Variable): Attention mask.
            batch (int): Batch size.

        Returns:
            chainer.Variable: Outout of multi-head attention layer.

        """
        xp = self.xp
        if s_var is None:
            # batch, head, time1/2, d_k)
            Q = self.linear_q(e_var).reshape(batch, -1, self.h, self.d_k)
            K = self.linear_k(e_var).reshape(batch, -1, self.h, self.d_k)
            V = self.linear_v(e_var).reshape(batch, -1, self.h, self.d_k)
        else:
            Q = self.linear_q(e_var).reshape(batch, -1, self.h, self.d_k)
            K = self.linear_k(s_var).reshape(batch, -1, self.h, self.d_k)
            V = self.linear_v(s_var).reshape(batch, -1, self.h, self.d_k)
        scores = F.matmul(F.swapaxes(Q, 1, 2), K.transpose(0, 2, 3, 1)) / np.sqrt(
            self.d_k
        )
        if mask is not None:
            mask = xp.stack([mask] * self.h, axis=1)
            scores = F.where(mask, scores, xp.full(scores.shape, MIN_VALUE, "f"))
        self.attn = F.softmax(scores, axis=-1)
        p_attn = F.dropout(self.attn, self.dropout)
        x = F.matmul(p_attn, F.swapaxes(V, 1, 2))
        x = F.swapaxes(x, 1, 2).reshape(-1, self.h * self.d_k)
        return self.linear_out(x) 
Example #13
Source File: attention.py    From kiss with GNU General Public License v3.0 5 votes vote down vote up
def attention_implementation(self, query, key, value, mask=None, dropout_ratio=None):
        scores = F.matmul(query, F.transpose(key, (0, 1, 3, 2))) / math.sqrt(self.key_dimensionality)
        if mask is not None:
            batch_size, num_heads, _, _ = scores.shape
            mask = self.xp.array(mask)
            mask = self.xp.broadcast_to(mask, (batch_size, num_heads) + mask.shape[2:])
            mask = mask[:, :, :scores.shape[2], :scores.shape[3]]
            scores = F.where(mask, scores, self.xp.full_like(scores.array, -1e9))

        attention_probabilities = F.softmax(scores, axis=3)
        if dropout_ratio is not None:
            attention_probabilities = F.dropout(attention_probabilities, ratio=dropout_ratio)

        return F.matmul(attention_probabilities, value), attention_probabilities 
Example #14
Source File: look_at.py    From neural_renderer with MIT License 5 votes vote down vote up
def look_at(vertices, eye, at=None, up=None):
    """
    "Look at" transformation of vertices.
    """
    assert (vertices.ndim == 3)

    xp = chainer.cuda.get_array_module(vertices)
    batch_size = vertices.shape[0]
    if at is None:
        at = xp.array([0, 0, 0], 'float32')
    if up is None:
        up = xp.array([0, 1, 0], 'float32')

    if isinstance(eye, list) or isinstance(eye, tuple):
        eye = xp.array(eye, 'float32')
    if eye.ndim == 1:
        eye = cf.tile(eye[None, :], (batch_size, 1))
    if at.ndim == 1:
        at = cf.tile(at[None, :], (batch_size, 1))
    if up.ndim == 1:
        up = cf.tile(up[None, :], (batch_size, 1))

    # create new axes
    z_axis = cf.normalize(at - eye)
    x_axis = cf.normalize(neural_renderer.cross(up, z_axis))
    y_axis = cf.normalize(neural_renderer.cross(z_axis, x_axis))

    # create rotation matrix: [bs, 3, 3]
    r = cf.concat((x_axis[:, None, :], y_axis[:, None, :], z_axis[:, None, :]), axis=1)
    if r.shape[0] != vertices.shape[0]:
        r = cf.broadcast_to(r, (vertices.shape[0], 3, 3))

    # apply
    # [bs, nv, 3] -> [bs, nv, 3] -> [bs, nv, 3]
    if vertices.shape != eye.shape:
        eye = cf.broadcast_to(eye[:, None, :], vertices.shape)
    vertices = vertices - eye
    vertices = cf.matmul(vertices, r, transb=True)

    return vertices 
Example #15
Source File: dern.py    From der-network with MIT License 5 votes vote down vote up
def attention_history(self, dL, cue, train=True):
        D = F.concat(dL, axis=0)
        D, Cue = F.broadcast(D, cue)
        S = self.m(F.tanh(self.W_dm(D) + Cue))
        S = F.softmax(F.reshape(S, (1, len(dL))))
        pre_v = F.matmul(S, D)
        return pre_v 
Example #16
Source File: dern.py    From der-network with MIT License 5 votes vote down vote up
def predict_answer(self, u_Dq, v_eDq, e_occur_L, train=True):
        v_eDq2 = v_eDq + self.make_heuristic_vec(e_occur_L, train=train)
        score = F.matmul(u_Dq, v_eDq2, transb=True)
        return score 
Example #17
Source File: convolution_rbm.py    From SeRanet with MIT License 5 votes vote down vote up
def free_energy(self, v):
        """
        :param Variable (batch_size, in_channels, image_height, image_width) - input data (training data)
        :return: scalar
        """
        batch_size = v.data.shape[0]
        in_channels = self.in_channels
        real = self.real
        if real == 0:
            '''
            visible layer is 0, 1 (bit)
            vbias_term = 1 * SUM(a(i) * v(i))
            '''
            v_sum = F.sum(v, axis=(2, 3))  # sum over image_height & image_width
            # Originally, it should return sum for each batch.
            # but it returns scalar, which is sum over batches, since sum is used at the end anyway.
            vbias_term = F.sum(F.matmul(v_sum, self.conv.a))
            wx_b = self.conv(v)

        else:
            '''
            visible layer takes real value
            vbias_term = 0.5 * SUM((v(i)-a(i)) * (v(i) - a(i)))
            '''
            #TODO: check
            #m = Variable(xp.ones((batch_size, 1), dtype=xp.float32))
            n = F.reshape(self.conv.a, (1, in_channels, 1, 1))
            xp = cuda.get_array_module(n.data)
            std_ch = xp.reshape(self.std, (1, in_channels, 1, 1))

            #v_ = v - F.matmul(m, n)
            v_ = (v - F.broadcast_to(n, v.data.shape)) / std_ch
            vbias_term = F.sum(0.5 * v_ * v_)
            wx_b = self.conv(v / std_ch)


        hidden_term = F.sum(F.log(1 + F.exp(wx_b)))
        # print('vbias = ', vbias_term.data, ', hidden = ', hidden_term.data, 'F.exp(wx_b) = ', F.exp(wx_b).data)
        return - vbias_term - hidden_term 
Example #18
Source File: mlp_encoder.py    From models with MIT License 5 votes vote down vote up
def node2edge(self, x, rel_rec, rel_send):
        # NOTE: Assumes that we have the same graph across all samples.
        # x: [batch_size, num_nodes, feature_dim]
        # rel_rec, rel_send: [num_edges, num_nodes]
        receivers = F.matmul(rel_rec, x)
        senders = F.matmul(rel_send, x)
        # receivers, senders: [batch_size, num_edges, feature_dim]
        edges = F.concat([receivers, senders], axis=2)  # along num_edges
        return edges 
Example #19
Source File: look.py    From neural_renderer with MIT License 5 votes vote down vote up
def look(vertices, eye, direction=None, up=None):
    """
    "Look at" transformation of vertices.
    """
    assert (vertices.ndim == 3)

    xp = chainer.cuda.get_array_module(vertices)
    if direction is None:
        direction = xp.array([0, 0, 1], 'float32')
    if up is None:
        up = xp.array([0, 1, 0], 'float32')

    if isinstance(eye, list) or isinstance(eye, tuple):
        eye = xp.array(eye, 'float32')
    if eye.ndim == 1:
        eye = eye[None, :]
    if direction.ndim == 1:
        direction = direction[None, :]
    if up.ndim == 1:
        up = up[None, :]

    # create new axes
    z_axis = cf.normalize(direction)
    x_axis = cf.normalize(neural_renderer.cross(up, z_axis))
    y_axis = cf.normalize(neural_renderer.cross(z_axis, x_axis))

    # create rotation matrix: [bs, 3, 3]
    r = cf.concat((x_axis[:, None, :], y_axis[:, None, :], z_axis[:, None, :]), axis=1)
    if r.shape[0] != vertices.shape[0]:
        r = cf.broadcast_to(r, vertices.shape)

    # apply
    # [bs, nv, 3] -> [bs, nv, 3] -> [bs, nv, 3]
    if vertices.shape != eye.shape:
        eye = cf.broadcast_to(eye[:, None, :], vertices.shape)
    vertices = vertices - eye
    vertices = cf.matmul(vertices, r, transb=True)

    return vertices 
Example #20
Source File: transform_net.py    From chainer-pointnet with MIT License 5 votes vote down vote up
def __call__(self, x):
        t = self.trans_module(x)
        # t: (minibatch, K, K)
        # x: (minibatch, K, N, 1)
        # h: (minibatch, K, N)
        # K = in_dim
        h = functions.matmul(t, x[:, :, :, 0])
        bs, k, n = h.shape
        h = functions.reshape(h, (bs, k, n, 1))
        return h, t 
Example #21
Source File: pointnet_seg.py    From chainer-pointnet with MIT License 5 votes vote down vote up
def calc_trans_loss(t):
    # Loss to enforce the transformation as orthogonal matrix
    # t (batchsize, K, K) - transform matrix
    xp = cuda.get_array_module(t)
    bs, k1, k2 = t.shape
    assert k1 == k2
    mat_diff = functions.matmul(t, functions.transpose(t, (0, 2, 1)))
    mat_diff = mat_diff - xp.identity(k1, dtype=xp.float32)
    # divide by 2. is to make the behavior same with tf.
    # https://www.tensorflow.org/versions/r1.1/api_docs/python/tf/nn/l2_loss
    return functions.sum(functions.batch_l2_norm_squared(mat_diff)) / 2. 
Example #22
Source File: pointnet_cls.py    From chainer-pointnet with MIT License 5 votes vote down vote up
def calc_trans_loss(t):
    # Loss to enforce the transformation as orthogonal matrix
    # t (batchsize, K, K) - transform matrix
    xp = cuda.get_array_module(t)
    bs, k1, k2 = t.shape
    assert k1 == k2
    mat_diff = functions.matmul(t, functions.transpose(t, (0, 2, 1)))
    mat_diff = mat_diff - xp.identity(k1, dtype=xp.float32)
    # divide by 2. is to make the behavior same with tf.
    # https://www.tensorflow.org/versions/r1.1/api_docs/python/tf/nn/l2_loss
    return functions.sum(functions.batch_l2_norm_squared(mat_diff)) / 2. 
Example #23
Source File: relgcn_update.py    From chainer-chemistry with MIT License 5 votes vote down vote up
def __call__(self, h, adj, **kwargs):
        """main calculation

        Args:
            h: (batchsize, num_nodes, in_channels)
            adj: (batchsize, num_edge_type, num_nodes, num_nodes)

        Returns:
            (batchsize, num_nodes, ch)
        """
        mb, node, ch = h.shape

        # --- self connection, apply linear function ---
        hs = self.graph_linear_self(h)
        # --- relational feature, from neighbor connection ---
        # Expected number of neighbors of a vertex
        # Since you have to divide by it, if its 0, you need to
        # arbitrarily set it to 1
        m = self.graph_linear_edge(h)
        m = functions.reshape(
            m, (mb, node, self.out_channels, self.n_edge_types))
        m = functions.transpose(m, (0, 3, 1, 2))
        # m: (batchsize, edge_type, node, ch)
        # hrL (batchsize, edge_type, node, ch)
        hr = functions.matmul(adj, m)
        # hr: (batchsize, node, ch)
        hr = functions.sum(hr, axis=1)
        return hs + hr 
Example #24
Source File: relgcn.py    From graph-nvp with MIT License 5 votes vote down vote up
def __call__(self, h, adj):
        """

        Args:
            h:
            adj:

        Returns:

        """

        mb, node, ch = h.shape

        # --- self connection, apply linear function ---
        hs = self.graph_linear_self(h)
        # --- relational feature, from neighbor connection ---
        # Expected number of neighbors of a vertex
        # Since you have to divide by it, if its 0, you need to arbitrarily set it to 1
        m = self.graph_linear_edge(h)
        m = F.reshape(m, (mb, node, self.out_ch, self.num_edge_type))
        m = F.transpose(m, (0, 3, 1, 2))
        # m: (batchsize, edge_type, node, ch)
        # hr: (batchsize, edge_type, node, ch)
        hr = F.matmul(adj, m)
        # hr: (batchsize, node, ch)
        hr = F.sum(hr, axis=1)
        return hs + hr 
Example #25
Source File: backwards.py    From chainer-gan-experiments with MIT License 5 votes vote down vote up
def backward_linear(x_in, x, l):
    y = F.matmul(x, l.W)
    return y 
Example #26
Source File: nets.py    From text-gcn-chainer with Creative Commons Zero v1.0 Universal 5 votes vote down vote up
def __call__(self, x, adj):
        if isinstance(x, chainer.utils.CooMatrix):
            x = F.sparse_matmul(x, self.W)
        else:
            x = F.matmul(x, self.W)
        output = F.sparse_matmul(adj, x)

        if self.b is not None:
            output += self.b

        return output 
Example #27
Source File: n_pair_mc_loss.py    From deep_metric_learning with MIT License 5 votes vote down vote up
def n_pair_mc_loss(f, f_p, l2_reg):
    """Multi-class N-pair loss (N-pair-mc loss) function.

    Args:
        f (~chainer.Variable): Feature vectors.
            All examples must be different classes each other.
        f_p (~chainer.Variable): Positive examples corresponding to f.
            Each example must be the same class for each example in f.
        l2_reg (~float): A weight of L2 regularization for feature vectors.

    Returns:
        ~chainer.Variable: Loss value.

    See: `Improved Deep Metric Learning with Multi-class N-pair Loss \
        Objective <https://papers.nips.cc/paper/6200-improved-deep-metric-\
        learning-with-multi-class-n-pair-loss-objective>`_

    """
    logit = matmul(f, transpose(f_p))
    N = len(logit.data)
    xp = cuda.get_array_module(logit.data)
    loss_sce = softmax_cross_entropy(logit, xp.arange(N))
    l2_loss = sum(batch_l2_norm_squared(f) +
                  batch_l2_norm_squared(f_p)) / (2.0 * N)
    loss = loss_sce + l2_reg * l2_loss
    return loss 
Example #28
Source File: angular_loss.py    From deep_metric_learning with MIT License 5 votes vote down vote up
def angular_mc_loss(f, f_p, alpha=45, in_degree=True):
    '''
    Args:
        f (chainer.Variable or xp.npdarray):
            Anchor vectors. Each vectors in f must be l2 normalized.
        f_p (chainer.Variable or xp.npdarray):
            Positive vectors. Each vectors in f must be l2 normalized.
    '''
    xp = cuda.get_array_module(f)

    if in_degree:
        alpha = np.deg2rad(alpha)
    sq_tan_alpha = np.tan(alpha) ** 2
    n_pairs = len(f)

    # first and second term of f_{a,p,n}
    term1 = 4 * sq_tan_alpha * matmul(f + f_p, transpose(f_p))
    term2 = 2 * (1 + sq_tan_alpha) * F.sum(f * f_p, axis=1, keepdims=True)
#    term2 = 2 * (1 + sq_tan_alpha) * F.batch_matmul(f, f_p, transa=True).reshape(n_pairs, 1)

    f_apn = term1 - F.broadcast_to(term2, (n_pairs, n_pairs))
    # multiply zero to diagonal components of f_apn
    mask = xp.ones_like(f_apn.data) - xp.eye(n_pairs, dtype=f.dtype)
    f_apn = f_apn * mask

    return F.average(F.logsumexp(f_apn, axis=1)) 
Example #29
Source File: chainer-gogh.py    From chainer-gogh with MIT License 5 votes vote down vote up
def get_matrix(y):
    ch = y.data.shape[1]
    wd = y.data.shape[2]
    gogh_y = F.reshape(y, (ch,wd**2))
    gogh_matrix = F.matmul(gogh_y, gogh_y, transb=True)/np.float32(ch*wd**2)
    return gogh_matrix 
Example #30
Source File: test_matmul.py    From chainer with MIT License 5 votes vote down vote up
def test_invalid_shape(self):
        x_data = numpy.zeros((2, 3, 4), dtype=numpy.float32)
        y_data = numpy.zeros((3, 4, 3), dtype=numpy.float32)
        x = chainer.Variable(x_data)
        y = chainer.Variable(y_data)

        with self.assertRaises(type_check.InvalidType):
            F.matmul(x, y)