Python Examples of chainer.functions.matmul

Source File: state_q_functions.py From chainerrl with MIT License

6 votes

def __call__(self, state):
        h = self.hidden_layers(state)
        v = self.v(h)
        mu = self.mu(h)

        if self.scale_mu:
            mu = scale_by_tanh(mu, high=self.action_space.high,
                               low=self.action_space.low)

        mat_diag = F.exp(self.mat_diag(h))
        if hasattr(self, 'mat_non_diag'):
            mat_non_diag = self.mat_non_diag(h)
            tril = lower_triangular_matrix(mat_diag, mat_non_diag)
            mat = F.matmul(tril, tril, transb=True)
        else:
            mat = F.expand_dims(mat_diag ** 2, axis=2)
        return QuadraticActionValue(
            mu, mat, v, min_action=self.action_space.low,
            max_action=self.action_space.high)

Source File: convolution_rbm.py From SeRanet with MIT License

6 votes

def propup(self, vis):
        """
        This function propagates the visible units activation upwards to the hidden units
        Eq.(7)
        :param vis: Variable Matrix(batch_size, in_channels, image_height, image_width)
                    - given v_sample
        :return: Variable Matrix(batch_size, out_channels, image_height_out, image_width_out)
                 - probability for each hidden units to be h_i=1
        """
        # conv.W: Matrix(out_channels, in_channels, filter height=ksize, filter width=ksize)
        # conv.b: Vec   (out_channels, )
        if self.real == 0:
            pre_sigmoid_activation = self.conv(vis)
        else:
            pre_sigmoid_activation = self.conv(vis / self.std_ch)
        # F.matmul(vis, self.conv.W, transb=True) + F.broadcast_to(self.conv.b, (vis.data.shape[0], self.n_hidden))
        return F.sigmoid(pre_sigmoid_activation)

Source File: convolution_rbm.py From SeRanet with MIT License

6 votes

def propdown(self, hid):
        """ This function propagates the hidden units activation downwords to the visible units
        :param hid: Variable Matrix(batch_size, out_channels, image_height_out, image_width_out)  - given h_sample
        :return: Variable Matrix(batch_size, in_channels, image_height, image_width) - probability for each visible units to be v_j = 1
        """
        batch_size = hid.data.shape[0]
        if self.real == 0:
            W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
            pre_sigmoid_activation = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1)
                # F.matmul(hid, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible))
            v_mean = F.sigmoid(pre_sigmoid_activation)
            #print('W info ', self.conv.W.data.shape, 'W_flipped info ', W_flipped.data.shape)
            #print('W info ', self.conv.W.data[3, 0, 2, 3], 'W_flipped info ', W_flipped.data[0, 3, 8, 7])
            #print('W info ', self.conv.W.data[3, 0, 8, 7], 'W_flipped info ', W_flipped.data[0, 3, 2, 3])
            #print('W info ', self.conv.W.data[19, 0, 4, 0], 'W_flipped info ', W_flipped.data[0, 19, 6, 10])
            #print('pre_sigmoidactivation', F.sum(pre_sigmoid_activation).data)
            #print('v_mean', v_mean.data.shape)
            #print('v_mean sum', F.sum(v_mean).data)
            #print('hid', hid.data.shape)

        else:
            # TODO: check
            W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
            v_mean = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1)
        return v_mean

Source File: convolution_rbm.py From SeRanet with MIT License

6 votes

def reconstruct(self, v):
        """

        :param v: Variable Matrix(batch_size, in_channels, image_height, image_width)
        :return: reconstructed_v, Variable Matrix(batch_size, in_channels, image_height, image_width)
        """
        batch_size = v.data.shape[0]
        xp = cuda.get_array_module(v.data)
        if self.real == 0:
            h = F.sigmoid(self.conv(v))
        else:
            std_ch = xp.reshape(self.std, (1, self.in_channels, 1, 1))
            h = F.sigmoid(self.conv(v / std_ch))
        # F.sigmoid(F.matmul(v, self.l.W, transb=True) + F.broadcast_to(self.l.b, (batch_size, self.n_hidden)))
        W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
        reconstructed_v = F.sigmoid(F.convolution_2d(h, W_flipped, self.conv.a, pad=self.ksize-1))
            # = F.sigmoid(F.matmul(h, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible)))
        return reconstructed_v

Source File: models.py From EEND with MIT License

6 votes

def dc_loss(embedding, label):
    """
    Deep clustering loss function.

    Args:
      embedding: (T,D)-shaped activation values
      label: (T,C)-shaped labels
    return:
      (1,)-shaped squared flobenius norm of the difference
      between embedding and label affinity matrices
    """
    xp = cuda.get_array_module(label)
    b = xp.zeros((label.shape[0], 2**label.shape[1]))
    b[np.arange(label.shape[0]),
      [int(''.join(str(x) for x in t), base=2) for t in label.data]] = 1

    label_f = chainer.Variable(b.astype(np.float32))
    loss = F.sum(F.square(F.matmul(embedding, embedding, True, False))) \
        + F.sum(F.square(F.matmul(label_f, label_f, True, False))) \
        - 2 * F.sum(F.square(F.matmul(embedding, label_f, True, False)))
    return loss

Source File: embed_mixture.py From lda2vec with MIT License

6 votes

def __call__(self, doc_ids, update_only_docs=False):
        """ Given an array of document integer indices, returns a vector
        for each document. The vector is composed of topic weights projected
        onto topic vectors.

        Args:
            doc_ids : chainer.Variable
                One-dimensional batch vectors of IDs

        Returns:
            doc_vector : chainer.Variable
                Batch of two-dimensional embeddings for every document.
        """
        # (batchsize, ) --> (batchsize, multinomial)
        proportions = self.proportions(doc_ids, softmax=True)
        # (batchsize, n_factors) * (n_factors, n_dim) --> (batchsize, n_dim)
        factors = F.dropout(self.factors(), ratio=self.dropout_ratio)
        if update_only_docs:
            factors.unchain_backward()
        w_sum = F.matmul(proportions, factors)
        return w_sum

Source File: set2set.py From chainer-chemistry with MIT License

6 votes

def __call__(self, h):
        # type: (chainer.Variable) -> chainer.Variable
        xp = cuda.get_array_module(h)
        mb, node, ch = h.shape  # type: int, int, int
        if self.q_star is None:
            self.q_star = [
                xp.zeros((1, self.in_channels * 2)).astype('f')
                for _ in range(mb)
            ]
        self.hx, self.cx, q = self.lstm_layer(self.hx, self.cx, self.q_star)
        # self.hx: (mb, mb, ch)
        # self.cx: (mb, mb, ch)
        # q: List[(1, ch) * mb]
        q = functions.stack(q)  # q: (mb, 1, ch)
        q_ = functions.transpose(q, axes=(0, 2, 1))  # q_: (mb, ch, 1)
        e = functions.matmul(h, q_)  # e: (mb, node, 1)
        a = functions.softmax(e)  # a: (mb, node, 1)
        a = functions.broadcast_to(a, h.shape)  # a: (mb, node, ch)
        r = functions.sum((a * h), axis=1, keepdims=True)  # r: (mb, 1, ch)
        q_star_ = functions.concat((q, r), axis=2)  # q_star_: (mb, 1, ch*2)
        self.q_star = functions.separate(q_star_)
        return functions.reshape(q_star_, (mb, ch * 2))

Source File: state_q_functions.py From chainerrl with MIT License

6 votes

def __call__(self, state):
        h = state
        for layer in self.hidden_layers:
            h = F.relu(layer(h))
        v = self.v(h)
        mu = self.mu(h)

        if self.scale_mu:
            mu = scale_by_tanh(mu, high=self.action_space.high,
                               low=self.action_space.low)

        mat_diag = F.exp(self.mat_diag(h))
        if hasattr(self, 'mat_non_diag'):
            mat_non_diag = self.mat_non_diag(h)
            tril = lower_triangular_matrix(mat_diag, mat_non_diag)
            mat = F.matmul(tril, tril, transb=True)
        else:
            mat = F.expand_dims(mat_diag ** 2, axis=2)
        return QuadraticActionValue(
            mu, mat, v, min_action=self.action_space.low,
            max_action=self.action_space.high)

Source File: categorical_dqn.py From chainerrl with MIT License

6 votes

def compute_weighted_value_loss(eltwise_loss, batch_size, weights,
                                batch_accumulator='mean'):
    """Compute a loss for value prediction problem.

    Args:
        eltwise_loss (Variable): Element-wise loss per example per atom
        weights (ndarray): Weights for y, t.
        batch_accumulator (str): 'mean' will divide loss by batchsize
    Returns:
        (Variable) scalar loss
    """
    assert batch_accumulator in ('mean', 'sum')

    # eltwise_loss is (batchsize, n_atoms) array of losses
    # weights is an array of shape (batch_size)
    # sum loss across atoms and then apply weight per example in batch
    loss_sum = F.matmul(F.sum(eltwise_loss, axis=1), weights)
    if batch_accumulator == 'mean':
        loss = loss_sum / batch_size
    elif batch_accumulator == 'sum':
        loss = loss_sum
    return loss

Source File: iqn.py From chainerrl with MIT License

6 votes

def compute_weighted_value_loss(eltwise_loss, weights,
                                batch_accumulator='mean'):
    """Compute a loss for value prediction problem.

    Args:
        eltwise_loss (Variable): Element-wise loss per example
        weights (ndarray): Weights for y, t.
        batch_accumulator (str): 'mean' will divide loss by batchsize
    Returns:
        (Variable) scalar loss
    """
    batch_size = eltwise_loss.shape[0]
    assert batch_accumulator in ('mean', 'sum')
    assert eltwise_loss.ndim == 3
    # eltwise_loss is (batchsize, n , n') array of losses
    # weights is an array of shape (batch_size)
    # apply weights per example in batch
    loss_sum = F.matmul(F.sum(F.mean(eltwise_loss, axis=2), axis=1), weights)
    if batch_accumulator == 'mean':
        loss = loss_sum / batch_size
    elif batch_accumulator == 'sum':
        loss = loss_sum
    return loss

Source File: net.py From chainer-gan-lib with MIT License

5 votes

def backward_linear(x_in, x, l):
    y = F.matmul(x, l.W)
    return y

Source File: attention.py From espnet with Apache License 2.0

5 votes

def forward(self, e_var, s_var=None, mask=None, batch=1):
        """Core function of the Multi-head attention layer.

        Args:
            e_var (chainer.Variable): Variable of input array.
            s_var (chainer.Variable): Variable of source array from encoder.
            mask (chainer.Variable): Attention mask.
            batch (int): Batch size.

        Returns:
            chainer.Variable: Outout of multi-head attention layer.

        """
        xp = self.xp
        if s_var is None:
            # batch, head, time1/2, d_k)
            Q = self.linear_q(e_var).reshape(batch, -1, self.h, self.d_k)
            K = self.linear_k(e_var).reshape(batch, -1, self.h, self.d_k)
            V = self.linear_v(e_var).reshape(batch, -1, self.h, self.d_k)
        else:
            Q = self.linear_q(e_var).reshape(batch, -1, self.h, self.d_k)
            K = self.linear_k(s_var).reshape(batch, -1, self.h, self.d_k)
            V = self.linear_v(s_var).reshape(batch, -1, self.h, self.d_k)
        scores = F.matmul(F.swapaxes(Q, 1, 2), K.transpose(0, 2, 3, 1)) / np.sqrt(
            self.d_k
        )
        if mask is not None:
            mask = xp.stack([mask] * self.h, axis=1)
            scores = F.where(mask, scores, xp.full(scores.shape, MIN_VALUE, "f"))
        self.attn = F.softmax(scores, axis=-1)
        p_attn = F.dropout(self.attn, self.dropout)
        x = F.matmul(p_attn, F.swapaxes(V, 1, 2))
        x = F.swapaxes(x, 1, 2).reshape(-1, self.h * self.d_k)
        return self.linear_out(x)

Source File: attention.py From kiss with GNU General Public License v3.0

5 votes

def attention_implementation(self, query, key, value, mask=None, dropout_ratio=None):
        scores = F.matmul(query, F.transpose(key, (0, 1, 3, 2))) / math.sqrt(self.key_dimensionality)
        if mask is not None:
            batch_size, num_heads, _, _ = scores.shape
            mask = self.xp.array(mask)
            mask = self.xp.broadcast_to(mask, (batch_size, num_heads) + mask.shape[2:])
            mask = mask[:, :, :scores.shape[2], :scores.shape[3]]
            scores = F.where(mask, scores, self.xp.full_like(scores.array, -1e9))

        attention_probabilities = F.softmax(scores, axis=3)
        if dropout_ratio is not None:
            attention_probabilities = F.dropout(attention_probabilities, ratio=dropout_ratio)

        return F.matmul(attention_probabilities, value), attention_probabilities

Source File: look_at.py From neural_renderer with MIT License

5 votes

def look_at(vertices, eye, at=None, up=None):
    """
    "Look at" transformation of vertices.
    """
    assert (vertices.ndim == 3)

    xp = chainer.cuda.get_array_module(vertices)
    batch_size = vertices.shape[0]
    if at is None:
        at = xp.array([0, 0, 0], 'float32')
    if up is None:
        up = xp.array([0, 1, 0], 'float32')

    if isinstance(eye, list) or isinstance(eye, tuple):
        eye = xp.array(eye, 'float32')
    if eye.ndim == 1:
        eye = cf.tile(eye[None, :], (batch_size, 1))
    if at.ndim == 1:
        at = cf.tile(at[None, :], (batch_size, 1))
    if up.ndim == 1:
        up = cf.tile(up[None, :], (batch_size, 1))

    # create new axes
    z_axis = cf.normalize(at - eye)
    x_axis = cf.normalize(neural_renderer.cross(up, z_axis))
    y_axis = cf.normalize(neural_renderer.cross(z_axis, x_axis))

    # create rotation matrix: [bs, 3, 3]
    r = cf.concat((x_axis[:, None, :], y_axis[:, None, :], z_axis[:, None, :]), axis=1)
    if r.shape[0] != vertices.shape[0]:
        r = cf.broadcast_to(r, (vertices.shape[0], 3, 3))

    # apply
    # [bs, nv, 3] -> [bs, nv, 3] -> [bs, nv, 3]
    if vertices.shape != eye.shape:
        eye = cf.broadcast_to(eye[:, None, :], vertices.shape)
    vertices = vertices - eye
    vertices = cf.matmul(vertices, r, transb=True)

    return vertices

Source File: dern.py From der-network with MIT License

5 votes

def attention_history(self, dL, cue, train=True):
        D = F.concat(dL, axis=0)
        D, Cue = F.broadcast(D, cue)
        S = self.m(F.tanh(self.W_dm(D) + Cue))
        S = F.softmax(F.reshape(S, (1, len(dL))))
        pre_v = F.matmul(S, D)
        return pre_v

Source File: dern.py From der-network with MIT License

5 votes

def predict_answer(self, u_Dq, v_eDq, e_occur_L, train=True):
        v_eDq2 = v_eDq + self.make_heuristic_vec(e_occur_L, train=train)
        score = F.matmul(u_Dq, v_eDq2, transb=True)
        return score

Source File: convolution_rbm.py From SeRanet with MIT License

5 votes

def free_energy(self, v):
        """
        :param Variable (batch_size, in_channels, image_height, image_width) - input data (training data)
        :return: scalar
        """
        batch_size = v.data.shape[0]
        in_channels = self.in_channels
        real = self.real
        if real == 0:
            '''
            visible layer is 0, 1 (bit)
            vbias_term = 1 * SUM(a(i) * v(i))
            '''
            v_sum = F.sum(v, axis=(2, 3))  # sum over image_height & image_width
            # Originally, it should return sum for each batch.
            # but it returns scalar, which is sum over batches, since sum is used at the end anyway.
            vbias_term = F.sum(F.matmul(v_sum, self.conv.a))
            wx_b = self.conv(v)

        else:
            '''
            visible layer takes real value
            vbias_term = 0.5 * SUM((v(i)-a(i)) * (v(i) - a(i)))
            '''
            #TODO: check
            #m = Variable(xp.ones((batch_size, 1), dtype=xp.float32))
            n = F.reshape(self.conv.a, (1, in_channels, 1, 1))
            xp = cuda.get_array_module(n.data)
            std_ch = xp.reshape(self.std, (1, in_channels, 1, 1))

            #v_ = v - F.matmul(m, n)
            v_ = (v - F.broadcast_to(n, v.data.shape)) / std_ch
            vbias_term = F.sum(0.5 * v_ * v_)
            wx_b = self.conv(v / std_ch)


        hidden_term = F.sum(F.log(1 + F.exp(wx_b)))
        # print('vbias = ', vbias_term.data, ', hidden = ', hidden_term.data, 'F.exp(wx_b) = ', F.exp(wx_b).data)
        return - vbias_term - hidden_term

Source File: mlp_encoder.py From models with MIT License

5 votes

def node2edge(self, x, rel_rec, rel_send):
        # NOTE: Assumes that we have the same graph across all samples.
        # x: [batch_size, num_nodes, feature_dim]
        # rel_rec, rel_send: [num_edges, num_nodes]
        receivers = F.matmul(rel_rec, x)
        senders = F.matmul(rel_send, x)
        # receivers, senders: [batch_size, num_edges, feature_dim]
        edges = F.concat([receivers, senders], axis=2)  # along num_edges
        return edges

Source File: look.py From neural_renderer with MIT License

5 votes

def look(vertices, eye, direction=None, up=None):
    """
    "Look at" transformation of vertices.
    """
    assert (vertices.ndim == 3)

    xp = chainer.cuda.get_array_module(vertices)
    if direction is None:
        direction = xp.array([0, 0, 1], 'float32')
    if up is None:
        up = xp.array([0, 1, 0], 'float32')

    if isinstance(eye, list) or isinstance(eye, tuple):
        eye = xp.array(eye, 'float32')
    if eye.ndim == 1:
        eye = eye[None, :]
    if direction.ndim == 1:
        direction = direction[None, :]
    if up.ndim == 1:
        up = up[None, :]

    # create new axes
    z_axis = cf.normalize(direction)
    x_axis = cf.normalize(neural_renderer.cross(up, z_axis))
    y_axis = cf.normalize(neural_renderer.cross(z_axis, x_axis))

    # create rotation matrix: [bs, 3, 3]
    r = cf.concat((x_axis[:, None, :], y_axis[:, None, :], z_axis[:, None, :]), axis=1)
    if r.shape[0] != vertices.shape[0]:
        r = cf.broadcast_to(r, vertices.shape)

    # apply
    # [bs, nv, 3] -> [bs, nv, 3] -> [bs, nv, 3]
    if vertices.shape != eye.shape:
        eye = cf.broadcast_to(eye[:, None, :], vertices.shape)
    vertices = vertices - eye
    vertices = cf.matmul(vertices, r, transb=True)

    return vertices

Source File: transform_net.py From chainer-pointnet with MIT License

5 votes

def __call__(self, x):
        t = self.trans_module(x)
        # t: (minibatch, K, K)
        # x: (minibatch, K, N, 1)
        # h: (minibatch, K, N)
        # K = in_dim
        h = functions.matmul(t, x[:, :, :, 0])
        bs, k, n = h.shape
        h = functions.reshape(h, (bs, k, n, 1))
        return h, t

Source File: pointnet_seg.py From chainer-pointnet with MIT License

5 votes

def calc_trans_loss(t):
    # Loss to enforce the transformation as orthogonal matrix
    # t (batchsize, K, K) - transform matrix
    xp = cuda.get_array_module(t)
    bs, k1, k2 = t.shape
    assert k1 == k2
    mat_diff = functions.matmul(t, functions.transpose(t, (0, 2, 1)))
    mat_diff = mat_diff - xp.identity(k1, dtype=xp.float32)
    # divide by 2. is to make the behavior same with tf.
    # https://www.tensorflow.org/versions/r1.1/api_docs/python/tf/nn/l2_loss
    return functions.sum(functions.batch_l2_norm_squared(mat_diff)) / 2.

Source File: pointnet_cls.py From chainer-pointnet with MIT License

5 votes

def calc_trans_loss(t):
    # Loss to enforce the transformation as orthogonal matrix
    # t (batchsize, K, K) - transform matrix
    xp = cuda.get_array_module(t)
    bs, k1, k2 = t.shape
    assert k1 == k2
    mat_diff = functions.matmul(t, functions.transpose(t, (0, 2, 1)))
    mat_diff = mat_diff - xp.identity(k1, dtype=xp.float32)
    # divide by 2. is to make the behavior same with tf.
    # https://www.tensorflow.org/versions/r1.1/api_docs/python/tf/nn/l2_loss
    return functions.sum(functions.batch_l2_norm_squared(mat_diff)) / 2.

Source File: relgcn_update.py From chainer-chemistry with MIT License

5 votes

def __call__(self, h, adj, **kwargs):
        """main calculation

        Args:
            h: (batchsize, num_nodes, in_channels)
            adj: (batchsize, num_edge_type, num_nodes, num_nodes)

        Returns:
            (batchsize, num_nodes, ch)
        """
        mb, node, ch = h.shape

        # --- self connection, apply linear function ---
        hs = self.graph_linear_self(h)
        # --- relational feature, from neighbor connection ---
        # Expected number of neighbors of a vertex
        # Since you have to divide by it, if its 0, you need to
        # arbitrarily set it to 1
        m = self.graph_linear_edge(h)
        m = functions.reshape(
            m, (mb, node, self.out_channels, self.n_edge_types))
        m = functions.transpose(m, (0, 3, 1, 2))
        # m: (batchsize, edge_type, node, ch)
        # hrL (batchsize, edge_type, node, ch)
        hr = functions.matmul(adj, m)
        # hr: (batchsize, node, ch)
        hr = functions.sum(hr, axis=1)
        return hs + hr

Source File: relgcn.py From graph-nvp with MIT License

5 votes

def __call__(self, h, adj):
        """

        Args:
            h:
            adj:

        Returns:

        """

        mb, node, ch = h.shape

        # --- self connection, apply linear function ---
        hs = self.graph_linear_self(h)
        # --- relational feature, from neighbor connection ---
        # Expected number of neighbors of a vertex
        # Since you have to divide by it, if its 0, you need to arbitrarily set it to 1
        m = self.graph_linear_edge(h)
        m = F.reshape(m, (mb, node, self.out_ch, self.num_edge_type))
        m = F.transpose(m, (0, 3, 1, 2))
        # m: (batchsize, edge_type, node, ch)
        # hr: (batchsize, edge_type, node, ch)
        hr = F.matmul(adj, m)
        # hr: (batchsize, node, ch)
        hr = F.sum(hr, axis=1)
        return hs + hr

Source File: backwards.py From chainer-gan-experiments with MIT License

5 votes

def backward_linear(x_in, x, l):
    y = F.matmul(x, l.W)
    return y

Source File: nets.py From text-gcn-chainer with Creative Commons Zero v1.0 Universal

5 votes

def __call__(self, x, adj):
        if isinstance(x, chainer.utils.CooMatrix):
            x = F.sparse_matmul(x, self.W)
        else:
            x = F.matmul(x, self.W)
        output = F.sparse_matmul(adj, x)

        if self.b is not None:
            output += self.b

        return output

Source File: n_pair_mc_loss.py From deep_metric_learning with MIT License

5 votes

def n_pair_mc_loss(f, f_p, l2_reg):
    """Multi-class N-pair loss (N-pair-mc loss) function.

    Args:
        f (~chainer.Variable): Feature vectors.
            All examples must be different classes each other.
        f_p (~chainer.Variable): Positive examples corresponding to f.
            Each example must be the same class for each example in f.
        l2_reg (~float): A weight of L2 regularization for feature vectors.

    Returns:
        ~chainer.Variable: Loss value.

    See: `Improved Deep Metric Learning with Multi-class N-pair Loss \
        Objective <https://papers.nips.cc/paper/6200-improved-deep-metric-\
        learning-with-multi-class-n-pair-loss-objective>`_

    """
    logit = matmul(f, transpose(f_p))
    N = len(logit.data)
    xp = cuda.get_array_module(logit.data)
    loss_sce = softmax_cross_entropy(logit, xp.arange(N))
    l2_loss = sum(batch_l2_norm_squared(f) +
                  batch_l2_norm_squared(f_p)) / (2.0 * N)
    loss = loss_sce + l2_reg * l2_loss
    return loss

Source File: angular_loss.py From deep_metric_learning with MIT License

5 votes

def angular_mc_loss(f, f_p, alpha=45, in_degree=True):
    '''
    Args:
        f (chainer.Variable or xp.npdarray):
            Anchor vectors. Each vectors in f must be l2 normalized.
        f_p (chainer.Variable or xp.npdarray):
            Positive vectors. Each vectors in f must be l2 normalized.
    '''
    xp = cuda.get_array_module(f)

    if in_degree:
        alpha = np.deg2rad(alpha)
    sq_tan_alpha = np.tan(alpha) ** 2
    n_pairs = len(f)

    # first and second term of f_{a,p,n}
    term1 = 4 * sq_tan_alpha * matmul(f + f_p, transpose(f_p))
    term2 = 2 * (1 + sq_tan_alpha) * F.sum(f * f_p, axis=1, keepdims=True)
#    term2 = 2 * (1 + sq_tan_alpha) * F.batch_matmul(f, f_p, transa=True).reshape(n_pairs, 1)

    f_apn = term1 - F.broadcast_to(term2, (n_pairs, n_pairs))
    # multiply zero to diagonal components of f_apn
    mask = xp.ones_like(f_apn.data) - xp.eye(n_pairs, dtype=f.dtype)
    f_apn = f_apn * mask

    return F.average(F.logsumexp(f_apn, axis=1))

Source File: chainer-gogh.py From chainer-gogh with MIT License

5 votes

def get_matrix(y):
    ch = y.data.shape[1]
    wd = y.data.shape[2]
    gogh_y = F.reshape(y, (ch,wd**2))
    gogh_matrix = F.matmul(gogh_y, gogh_y, transb=True)/np.float32(ch*wd**2)
    return gogh_matrix

Source File: test_matmul.py From chainer with MIT License

5 votes

def test_invalid_shape(self):
        x_data = numpy.zeros((2, 3, 4), dtype=numpy.float32)
        y_data = numpy.zeros((3, 4, 3), dtype=numpy.float32)
        x = chainer.Variable(x_data)
        y = chainer.Variable(y_data)

        with self.assertRaises(type_check.InvalidType):
            F.matmul(x, y)

Python chainer.functions.matmul() Examples