Python torch.matmul() Examples

The following are 30 code examples of torch.matmul(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch , or try the search function .
Example #1
Source File: train_eval.py    From IGMC with MIT License 7 votes vote down vote up
def train(model, optimizer, loader, device, regression=False, ARR=0):
    model.train()
    total_loss = 0
    for data in loader:
        optimizer.zero_grad()
        data = data.to(device)
        out = model(data)
        if regression:
            loss = F.mse_loss(out, data.y.view(-1))
        else:
            loss = F.nll_loss(out, data.y.view(-1))
        if ARR != 0:
            for gconv in model.convs:
                w = torch.matmul(
                    gconv.att, 
                    gconv.basis.view(gconv.num_bases, -1)
                ).view(gconv.num_relations, gconv.in_channels, gconv.out_channels)
                reg_loss = torch.sum((w[1:, :, :] - w[:-1, :, :])**2)
                loss += ARR * reg_loss
        loss.backward()
        total_loss += loss.item() * num_graphs(data)
        optimizer.step()
        torch.cuda.empty_cache()
    return total_loss / len(loader.dataset) 
Example #2
Source File: nonlocal_layer.py    From Attention-Gated-Networks with MIT License 6 votes vote down vote up
def _embedded_gaussian(self, x):
        batch_size = x.size(0)

        # g=>(b, c, t, h, w)->(b, 0.5c, t, h, w)->(b, thw, 0.5c)
        g_x = self.g(x).view(batch_size, self.inter_channels, -1)
        g_x = g_x.permute(0, 2, 1)

        # theta=>(b, c, t, h, w)[->(b, 0.5c, t, h, w)]->(b, thw, 0.5c)
        # phi  =>(b, c, t, h, w)[->(b, 0.5c, t, h, w)]->(b, 0.5c, thw)
        # f=>(b, thw, 0.5c)dot(b, 0.5c, twh) = (b, thw, thw)
        theta_x = self.theta(x).view(batch_size, self.inter_channels, -1)
        theta_x = theta_x.permute(0, 2, 1)
        phi_x = self.phi(x).view(batch_size, self.inter_channels, -1)
        f = torch.matmul(theta_x, phi_x)
        f_div_C = F.softmax(f, dim=-1)

        # (b, thw, thw)dot(b, thw, 0.5c) = (b, thw, 0.5c)->(b, 0.5c, t, h, w)->(b, c, t, h, w)
        y = torch.matmul(f_div_C, g_x)
        y = y.permute(0, 2, 1).contiguous()
        y = y.view(batch_size, self.inter_channels, *x.size()[2:])
        W_y = self.W(y)
        z = W_y + x

        return z 
Example #3
Source File: box_util.py    From H3DNet with MIT License 6 votes vote down vote up
def get_3d_box_batch(box_size, heading_angle, center):
    ''' box_size: [x1,x2,...,xn,3]
        heading_angle: [x1,x2,...,xn]
        center: [x1,x2,...,xn,3]
    Return:
        [x1,x3,...,xn,8,3]
    '''
    input_shape = heading_angle.shape
    R = roty_batch(heading_angle)
    l = np.expand_dims(box_size[...,0], -1) # [x1,...,xn,1]
    w = np.expand_dims(box_size[...,1], -1)
    h = np.expand_dims(box_size[...,2], -1)
    corners_3d = np.zeros(tuple(list(input_shape)+[8,3]))
    corners_3d[...,:,0] = np.concatenate((l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2), -1)
    corners_3d[...,:,1] = np.concatenate((h/2,h/2,h/2,h/2,-h/2,-h/2,-h/2,-h/2), -1)
    corners_3d[...,:,2] = np.concatenate((w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2), -1)
    tlist = [i for i in range(len(input_shape))]
    tlist += [len(input_shape)+1, len(input_shape)]
    corners_3d = np.matmul(corners_3d, np.transpose(R, tuple(tlist)))
    corners_3d += np.expand_dims(center, -2)
    return corners_3d 
Example #4
Source File: linear.py    From pyfilter with MIT License 6 votes vote down vote up
def _kernel_2d(self, y, loc, h_var_inv, o_var_inv, c):
        tc = c if self._model.obs_ndim > 0 else c.unsqueeze(-2)

        # ===== Define covariance ===== #
        ttc = tc.transpose(-2, -1)
        diag_o_var_inv = construct_diag(o_var_inv if self._model.observable.ndim > 0 else o_var_inv.unsqueeze(-1))
        t2 = torch.matmul(ttc, torch.matmul(diag_o_var_inv, tc))

        cov = (construct_diag(h_var_inv) + t2).inverse()

        # ===== Get mean ===== #
        t1 = h_var_inv * loc

        t2 = torch.matmul(diag_o_var_inv, y if y.dim() > 0 else y.unsqueeze(-1))
        t3 = torch.matmul(ttc, t2.unsqueeze(-1))[..., 0]

        m = torch.matmul(cov, (t1 + t3).unsqueeze(-1))[..., 0]

        return MultivariateNormal(m, scale_tril=torch.cholesky(cov)) 
Example #5
Source File: data_cnn.py    From View-Adaptive-Neural-Networks-for-Skeleton-based-Human-Action-Recognition with MIT License 6 votes vote down vote up
def _rot(rot):
    cos_r, sin_r = rot.cos(), rot.sin()
    zeros = rot.new(rot.size()[:2] + (1,)).zero_()
    ones = rot.new(rot.size()[:2] + (1,)).fill_(1)

    r1 = torch.stack((ones, zeros, zeros),dim=-1)
    rx2 = torch.stack((zeros, cos_r[:,:,0:1], sin_r[:,:,0:1]), dim = -1)
    rx3 = torch.stack((zeros, -sin_r[:,:,0:1], cos_r[:,:,0:1]), dim = -1)
    rx = torch.cat((r1, rx2, rx3), dim = 2)

    ry1 = torch.stack((cos_r[:,:,1:2], zeros, -sin_r[:,:,1:2]), dim =-1)
    r2 = torch.stack((zeros, ones, zeros),dim=-1)
    ry3 = torch.stack((sin_r[:,:,1:2], zeros, cos_r[:,:,1:2]), dim =-1)
    ry = torch.cat((ry1, r2, ry3), dim = 2)

    rz1 = torch.stack((cos_r[:,:,2:3], sin_r[:,:,2:3], zeros), dim =-1)
    r3 = torch.stack((zeros, zeros, ones),dim=-1)
    rz2 = torch.stack((-sin_r[:,:,2:3], cos_r[:,:,2:3],zeros), dim =-1)
    rz = torch.cat((rz1, rz2, r3), dim = 2)

    rot = rz.matmul(ry).matmul(rx)

    return rot 
Example #6
Source File: transform_cnn.py    From View-Adaptive-Neural-Networks-for-Skeleton-based-Human-Action-Recognition with MIT License 6 votes vote down vote up
def _transform(x, mat, maxmin):
    rot = mat[:,0:3]
    trans = mat[:,3:6]

    x = x.contiguous().view(-1, x.size()[1] , x.size()[2] * x.size()[3])

    max_val, min_val = maxmin[:,0], maxmin[:,1]
    max_val, min_val = max_val.contiguous().view(-1,1), min_val.contiguous().view(-1,1)
    max_val, min_val = max_val.repeat(1,3), min_val.repeat(1,3)
    trans, rot = _trans_rot(trans, rot)

    x1 = torch.matmul(rot,x)
    min_val1 = torch.cat((min_val, Variable(min_val.data.new(min_val.size()[0], 1).fill_(1))), dim=-1)
    min_val1 = min_val1.unsqueeze(-1)
    min_val1 = torch.matmul(trans, min_val1)

    min_val = torch.div( torch.add(torch.matmul(rot, min_val1).squeeze(-1), - min_val), torch.add(max_val, - min_val))

    min_val = min_val.mul_(255)
    x = torch.add(x1, min_val.unsqueeze(-1))

    x = x.contiguous().view(-1,3, 224,224)

    return x 
Example #7
Source File: GST.py    From GST-Tacotron with MIT License 6 votes vote down vote up
def forward(self, query, key):
        querys = self.W_query(query)  # [N, T_q, num_units]
        keys = self.W_key(key)  # [N, T_k, num_units]
        values = self.W_value(key)

        split_size = self.num_units // self.num_heads
        querys = torch.stack(torch.split(querys, split_size, dim=2), dim=0)  # [h, N, T_q, num_units/h]
        keys = torch.stack(torch.split(keys, split_size, dim=2), dim=0)  # [h, N, T_k, num_units/h]
        values = torch.stack(torch.split(values, split_size, dim=2), dim=0)  # [h, N, T_k, num_units/h]

        # score = softmax(QK^T / (d_k ** 0.5))
        scores = torch.matmul(querys, keys.transpose(2, 3))  # [h, N, T_q, T_k]
        scores = scores / (self.key_dim ** 0.5)
        scores = F.softmax(scores, dim=3)

        # out = score * V
        out = torch.matmul(scores, values)  # [h, N, T_q, num_units/h]
        out = torch.cat(torch.split(out, 1, dim=0), dim=3).squeeze(0)  # [N, T_q, num_units]

        return out 
Example #8
Source File: self_attention.py    From TVQAplus with MIT License 6 votes vote down vote up
def attention(self, query, key, value, mask=None, dropout=None):
        """ Compute 'Scaled Dot Product Attention'
        Args:
            query: (N, nh, L, d_k)
            key: (N, nh, L, d_k)
            value: (N, nh, L, d_k)
            mask: (N, 1, L, 1)
            dropout:
        """
        scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(self.d_k)  # (N, nh, L, L)
        if mask is not None:
            scores = scores.masked_fill(mask == 0, -1e9)
        p_attn = torch.softmax(scores, dim=-1)
        if dropout is not None:
            p_attn = dropout(p_attn)
        return torch.matmul(p_attn, value), p_attn  # (N, nh, L, d_k), (N, nh, L, L) 
Example #9
Source File: context_query_attention.py    From TVQAplus with MIT License 6 votes vote down vote up
def similarity(self, C, Q, c_mask, q_mask):
        """
        word2word dot-product similarity
        Args:
            C: (N, 5, Li, Lqa, D)
            Q: (N, 1, Li, Lr, D)
            c_mask: (N, 5, Li, Lqa)
            q_mask: (N, 1, Li, Lr)
        Returns:
            (N, *, Lc, Lq)
        """
        C = F.dropout(F.normalize(C, p=2, dim=-1), p=self.dropout, training=self.training)
        Q = F.dropout(F.normalize(Q, p=2, dim=-1), p=self.dropout, training=self.training)

        S_mask = torch.matmul(c_mask.unsqueeze(-1), q_mask.unsqueeze(-2))  # (N, 5, Li, Lqa, Lr)
        S = torch.matmul(C, Q.transpose(-2, -1))  # (N, 5, Li, Lqa, Lr)
        masked_S = S - 1e10*(1 - S_mask)  # (N, 5, Li, Lqa, Lr)
        return masked_S, S_mask 
Example #10
Source File: context_query_attention.py    From TVQAplus with MIT License 6 votes vote down vote up
def similarity(self, C, Q, c_mask, q_mask):
        """
        word2word dot-product similarity
        :param C: (N, *, Lc, D)
        :param Q: (N, *, Lq, D)
        :param c_mask: (N, *, Lc)
        :param q_mask: (N, *, Lq)
        :return: (N, *, Lc, Lq)
        """
        C = F.dropout(C, p=0.1, training=self.training)
        Q = F.dropout(Q, p=0.1, training=self.training)
        hsz_root = math.sqrt(C.shape[-1])

        S_mask = torch.matmul(c_mask.unsqueeze(-1), q_mask.unsqueeze(-2))  # (N, *, Lc, Lq)
        S = torch.matmul(C, Q.transpose(-2, -1)) / hsz_root  # (N, *, Lc, Lq)
        masked_S = S - 1e10*(1 - S_mask)  # (N, *, Lc, Lq)
        return masked_S 
Example #11
Source File: nonlocal_layer.py    From Attention-Gated-Networks with MIT License 6 votes vote down vote up
def _gaussian(self, x):
        batch_size = x.size(0)
        g_x = self.g(x).view(batch_size, self.inter_channels, -1)
        g_x = g_x.permute(0, 2, 1)

        theta_x = x.view(batch_size, self.in_channels, -1)
        theta_x = theta_x.permute(0, 2, 1)

        if self.sub_sample_factor > 1:
            phi_x = self.phi(x).view(batch_size, self.in_channels, -1)
        else:
            phi_x = x.view(batch_size, self.in_channels, -1)

        f = torch.matmul(theta_x, phi_x)
        f_div_C = F.softmax(f, dim=-1)

        y = torch.matmul(f_div_C, g_x)
        y = y.permute(0, 2, 1).contiguous()
        y = y.view(batch_size, self.inter_channels, *x.size()[2:])
        W_y = self.W(y)
        z = W_y + x

        return z 
Example #12
Source File: nonlocal_layer.py    From Attention-Gated-Networks with MIT License 6 votes vote down vote up
def _dot_product(self, x):
        batch_size = x.size(0)

        g_x = self.g(x).view(batch_size, self.inter_channels, -1)
        g_x = g_x.permute(0, 2, 1)

        theta_x = self.theta(x).view(batch_size, self.inter_channels, -1)
        theta_x = theta_x.permute(0, 2, 1)
        phi_x = self.phi(x).view(batch_size, self.inter_channels, -1)
        f = torch.matmul(theta_x, phi_x)
        N = f.size(-1)
        f_div_C = f / N

        y = torch.matmul(f_div_C, g_x)
        y = y.permute(0, 2, 1).contiguous()
        y = y.view(batch_size, self.inter_channels, *x.size()[2:])
        W_y = self.W(y)
        z = W_y + x

        return z 
Example #13
Source File: context_block.py    From mmdetection with Apache License 2.0 6 votes vote down vote up
def spatial_pool(self, x):
        batch, channel, height, width = x.size()
        if self.pooling_type == 'att':
            input_x = x
            # [N, C, H * W]
            input_x = input_x.view(batch, channel, height * width)
            # [N, 1, C, H * W]
            input_x = input_x.unsqueeze(1)
            # [N, 1, H, W]
            context_mask = self.conv_mask(x)
            # [N, 1, H * W]
            context_mask = context_mask.view(batch, 1, height * width)
            # [N, 1, H * W]
            context_mask = self.softmax(context_mask)
            # [N, 1, H * W, 1]
            context_mask = context_mask.unsqueeze(-1)
            # [N, 1, C, 1]
            context = torch.matmul(input_x, context_mask)
            # [N, C, 1, 1]
            context = context.view(batch, channel, 1, 1)
        else:
            # [N, C, 1, 1]
            context = self.avg_pool(x)

        return context 
Example #14
Source File: gpt.py    From comet-commonsense with Apache License 2.0 6 votes vote down vote up
def _attn(self, q, k, v, sequence_mask):
        w = torch.matmul(q, k)
        if self.scale:
            w = w / math.sqrt(v.size(-1))

        b_subset = self.b[:, :, :w.size(-2), :w.size(-1)]

        if sequence_mask is not None:
            b_subset = b_subset * sequence_mask.view(
                sequence_mask.size(0), 1, -1)
            b_subset = b_subset.permute(1, 0, 2, 3)

        w = w * b_subset + -1e9 * (1 - b_subset)
        w = nn.Softmax(dim=-1)(w)
        w = self.attn_dropout(w)
        return torch.matmul(w, v) 
Example #15
Source File: tsd_net.py    From ConvLab with MIT License 6 votes vote down vote up
def forward(self, z_enc_out, u_enc_out, u_input_np, m_t_input, degree_input, last_hidden, z_input_np):
        sparse_z_input = Variable(self.get_sparse_selective_input(z_input_np), requires_grad=False)

        m_embed = self.emb(m_t_input)
        z_context = self.attn_z(last_hidden, z_enc_out)
        u_context = self.attn_u(last_hidden, u_enc_out)
        gru_in = torch.cat([m_embed, u_context, z_context, degree_input.unsqueeze(0)], dim=2)
        gru_out, last_hidden = self.gru(gru_in, last_hidden)
        gen_score = self.proj(torch.cat([z_context, u_context, gru_out], 2)).squeeze(0)
        z_copy_score = F.tanh(self.proj_copy2(z_enc_out.transpose(0, 1)))
        z_copy_score = torch.matmul(z_copy_score, gru_out.squeeze(0).unsqueeze(2)).squeeze(2)
        z_copy_score = z_copy_score.cpu()
        z_copy_score_max = torch.max(z_copy_score, dim=1, keepdim=True)[0]
        z_copy_score = torch.exp(z_copy_score - z_copy_score_max)  # [B,T]
        z_copy_score = torch.log(torch.bmm(z_copy_score.unsqueeze(1), sparse_z_input)).squeeze(
            1) + z_copy_score_max  # [B,V]
        z_copy_score = cuda_(z_copy_score)

        scores = F.softmax(torch.cat([gen_score, z_copy_score], dim=1), dim=1)
        gen_score, z_copy_score = scores[:, :cfg.vocab_size], \
                                  scores[:, cfg.vocab_size:]
        proba = gen_score + z_copy_score[:, :cfg.vocab_size]  # [B,V]
        proba = torch.cat([proba, z_copy_score[:, cfg.vocab_size:]], 1)
        return proba, last_hidden, gru_out 
Example #16
Source File: context_block.py    From AerialDetection with Apache License 2.0 6 votes vote down vote up
def spatial_pool(self, x):
        batch, channel, height, width = x.size()
        if self.pooling_type == 'att':
            input_x = x
            # [N, C, H * W]
            input_x = input_x.view(batch, channel, height * width)
            # [N, 1, C, H * W]
            input_x = input_x.unsqueeze(1)
            # [N, 1, H, W]
            context_mask = self.conv_mask(x)
            # [N, 1, H * W]
            context_mask = context_mask.view(batch, 1, height * width)
            # [N, 1, H * W]
            context_mask = self.softmax(context_mask)
            # [N, 1, H * W, 1]
            context_mask = context_mask.unsqueeze(-1)
            # [N, 1, C, 1]
            context = torch.matmul(input_x, context_mask)
            # [N, C, 1, 1]
            context = context.view(batch, channel, 1, 1)
        else:
            # [N, C, 1, 1]
            context = self.avg_pool(x)

        return context 
Example #17
Source File: non_local.py    From AerialDetection with Apache License 2.0 5 votes vote down vote up
def dot_product(self, theta_x, phi_x):
        # pairwise_weight: [N, HxW, HxW]
        pairwise_weight = torch.matmul(theta_x, phi_x)
        pairwise_weight /= pairwise_weight.shape[-1]
        return pairwise_weight 
Example #18
Source File: meta.py    From ScenarioMeta with MIT License 5 votes vote down vote up
def forward(self, query_users, query_items, support_users, support_items):
        """
        :param query_users: (batch_size,)
        :param query_items: (batch_size,)
        :param support_users: (few_size,)
        :param support_items: (few_size,)
        :return: (batch_size, )
        :return:
        """
        query_users, query_items = self.user_embeds(query_users), self.item_embeds(query_items)
        support_users, support_items = self.user_embeds(support_users), self.item_embeds(support_items)
        similarity = F.softmax(F.cosine_similarity(query_users.unsqueeze(-1), support_users.t().unsqueeze(0)), dim=1)
        item_embeds = torch.matmul(similarity, support_items)
        return F.cosine_similarity(query_items, item_embeds, dim=1) 
Example #19
Source File: annotated_attention.py    From Character-Level-Language-Modeling-with-Deeper-Self-Attention-pytorch with MIT License 5 votes vote down vote up
def attention(query, key, value, mask=None, dropout=None):
    """Compute 'Scaled Dot Product Attention'"""
    d_k = query.size(-1)
    scores = torch.matmul(query, key.transpose(-2, -1)) \
             / math.sqrt(d_k)
    if mask is not None:
        scores = scores.masked_fill(mask == 0, -1e9)
    p_attn = F.softmax(scores, dim=-1)
    if dropout is not None:
        p_attn = dropout(p_attn)
    return torch.matmul(p_attn, value), p_attn 
Example #20
Source File: nonlocal_layer.py    From Attention-Gated-Networks with MIT License 5 votes vote down vote up
def _concatenation_proper(self, x):
        batch_size = x.size(0)

        # g=>(b, c, t, h, w)->(b, 0.5c, thw/s**2)
        g_x = self.g(x).view(batch_size, self.inter_channels, -1)

        # theta=>(b, c, t, h, w)[->(b, 0.5c, t, h, w)]->(b, 0.5c, thw)
        # phi  =>(b, c, t, h, w)[->(b, 0.5c, t, h, w)]->(b, 0.5c, thw/s**2)
        theta_x = self.theta(x).view(batch_size, self.inter_channels, -1)
        phi_x = self.phi(x).view(batch_size, self.inter_channels, -1)

        # theta => (b, 0.5c, thw) -> (expand) (b, 0.5c, thw/s**2, thw)
        # phi => (b, 0.5c, thw/s**2) ->  (expand) (b, 0.5c, thw/s**2, thw)
        # f=> RELU[(b, 0.5c, thw/s**2, thw) + (b, 0.5c, thw/s**2, thw)] = (b, 0.5c, thw/s**2, thw)
        f = theta_x.unsqueeze(dim=2).repeat(1,1,phi_x.size(2),1) + \
            phi_x.unsqueeze(dim=3).repeat(1,1,1,theta_x.size(2))
        f = F.relu(f, inplace=True)

        # psi -> W_psi^t * f -> (b, 1, thw/s**2, thw) -> (b, thw/s**2, thw)
        f = torch.squeeze(self.psi(f), dim=1)

        # Normalise the relations
        f_div_c = F.softmax(f, dim=1)

        # g(x_j) * f(x_j, x_i)
        # (b, 0.5c, thw/s**2) * (b, thw/s**2, thw) -> (b, 0.5c, thw)
        y = torch.matmul(g_x, f_div_c)
        y = y.contiguous().view(batch_size, self.inter_channels, *x.size()[2:])
        W_y = self.W(y)
        z = W_y + x

        return z 
Example #21
Source File: nonlocal_layer.py    From Attention-Gated-Networks with MIT License 5 votes vote down vote up
def _concatenation(self, x):
        batch_size = x.size(0)

        # g=>(b, c, t, h, w)->(b, 0.5c, thw/s**2)
        g_x = self.g(x).view(batch_size, self.inter_channels, -1)

        # theta=>(b, c, t, h, w)[->(b, 0.5c, t, h, w)]->(b, thw, 0.5c)
        # phi  =>(b, c, t, h, w)[->(b, 0.5c, t, h, w)]->(b, thw/s**2, 0.5c)
        theta_x = self.theta(x).view(batch_size, self.inter_channels, -1).permute(0, 2, 1)
        phi_x = self.phi(x).view(batch_size, self.inter_channels, -1).permute(0, 2, 1)

        # theta => (b, thw, 0.5c) -> (b, thw, 1) -> (b, 1, thw) -> (expand) (b, thw/s**2, thw)
        # phi => (b, thw/s**2, 0.5c) -> (b, thw/s**2, 1) -> (expand) (b, thw/s**2, thw)
        # f=> RELU[(b, thw/s**2, thw) + (b, thw/s**2, thw)] = (b, thw/s**2, thw)
        f = self.wf_theta(theta_x).permute(0, 2, 1).repeat(1, phi_x.size(1), 1) + \
            self.wf_phi(phi_x).repeat(1, 1, theta_x.size(1))
        f = F.relu(f, inplace=True)

        # Normalise the relations
        N = f.size(-1)
        f_div_c = f / N

        # g(x_j) * f(x_j, x_i)
        # (b, 0.5c, thw/s**2) * (b, thw/s**2, thw) -> (b, 0.5c, thw)
        y = torch.matmul(g_x, f_div_c)
        y = y.contiguous().view(batch_size, self.inter_channels, *x.size()[2:])
        W_y = self.W(y)
        z = W_y + x

        return z 
Example #22
Source File: semi_sup_net.py    From SEDST with MIT License 5 votes vote down vote up
def forward(self, z_enc_out, pz_proba, u_enc_out, m_t_input, degree_input, last_hidden):
        """
        decode the response: P(m|u,z)
        :param degree_input: [B,D]
        :param pz_proba: [Tz,B,V], output of the prior decoder
        :param z_enc_out: [Tz,B,H]
        :param u_enc_out: [T,B,H]
        :param m_t_input: [1,B]
        :param last_hidden:
        :return: proba: [1,B,V]
        """
        m_embed = self.emb(m_t_input)
        pz_proba = shift(pz_proba)

        z_context = self.attn_z(last_hidden, z_enc_out)
        u_context = self.attn_u(last_hidden, u_enc_out)
        d_control = z_context + torch.mul(F.sigmoid(self.gate_z(z_context)), u_context)
        embed = torch.cat([d_control, m_embed, degree_input.unsqueeze(0)], dim=2)
        embed = self.dropout(embed)
        gru_out, last_hidden = self.gru(embed, last_hidden)
        gen_score = self.proj(torch.cat([z_context, u_context, gru_out], 2)).squeeze(0)
        z_copy_score = F.tanh(self.proj_copy1(z_enc_out.transpose(0, 1)))  # [B,T,H]
        if not cfg.force_stable:
            z_copy_score = torch.exp(torch.matmul(z_copy_score, gru_out.squeeze(0).unsqueeze(2)).squeeze(2))  # [B,T]
            z_copy_score = torch.log(torch.bmm(z_copy_score.unsqueeze(1), pz_proba.transpose(0, 1))).squeeze(1)  # [B,V]
        else:
            z_copy_score = torch.matmul(z_copy_score, gru_out.squeeze(0).unsqueeze(2)).squeeze(2)
            z_copy_score_max = torch.max(z_copy_score, dim=1, keepdim=True)[0]
            z_copy_score = torch.exp(z_copy_score - z_copy_score_max)
            z_copy_score = torch.log(torch.bmm(z_copy_score.unsqueeze(1), pz_proba.transpose(0, 1)))
            z_copy_score = z_copy_score.squeeze(1) + z_copy_score_max
        scores = F.softmax(torch.cat([gen_score, z_copy_score], dim=1), dim=1)
        gen_score, z_copy_score = tuple(torch.split(scores, gen_score.size(1), dim=1))
        proba = gen_score + z_copy_score  # [B,V]
        return proba, last_hidden, gru_out 
Example #23
Source File: transform_cnn.py    From View-Adaptive-Neural-Networks-for-Skeleton-based-Human-Action-Recognition with MIT License 5 votes vote down vote up
def _trans_rot(trans, rot):
    cos_r, sin_r = rot.cos(), rot.sin()
    zeros = Variable(rot.data.new(rot.size()[:1] + (1,)).zero_())
    ones = Variable(rot.data.new(rot.size()[:1] + (1,)).fill_(1))

    r1 = torch.stack((ones, zeros, zeros),dim=-1)
    rx2 = torch.stack((zeros, cos_r[:,0:1], sin_r[:,0:1]), dim = -1)
    rx3 = torch.stack((zeros, -sin_r[:,0:1], cos_r[:,0:1]), dim = -1)
    rx = torch.cat((r1, rx2, rx3), dim = 1)

    ry1 = torch.stack((cos_r[:,1:2], zeros, -sin_r[:,1:2]), dim =-1)
    r2 = torch.stack((zeros, ones, zeros),dim=-1)
    ry3 = torch.stack((sin_r[:,1:2], zeros, cos_r[:,1:2]), dim =-1)
    ry = torch.cat((ry1, r2, ry3), dim = 1)

    rz1 = torch.stack((cos_r[:,2:3], sin_r[:,2:3], zeros), dim =-1)
    r3 = torch.stack((zeros, zeros, ones),dim=-1)
    rz2 = torch.stack((-sin_r[:,2:3], cos_r[:,2:3],zeros), dim =-1)
    rz = torch.cat((rz1, rz2, r3), dim = 1)

    rot = rz.matmul(ry).matmul(rx)


    rt1 = torch.stack((ones, zeros, zeros, trans[:,0:1]), dim = -1)
    rt2 = torch.stack((zeros, ones, zeros, trans[:,1:2]), dim = -1)
    rt3 = torch.stack((zeros, zeros, ones, trans[:,2:3]), dim = -1)
    trans = torch.cat((rt1, rt2, rt3), dim = 1)

    return trans, rot

# transform skeleton 
Example #24
Source File: sir.py    From pyfilter with MIT License 5 votes vote down vote up
def _helper(self, x, u):
        f_ = self.f(x, *self.theta_vals) * self._dt
        g = self.g(x, *self.theta_vals)

        return x + f_ + torch.matmul(g, u.unsqueeze(-1)).squeeze(-1) 
Example #25
Source File: linear.py    From pyfilter with MIT License 5 votes vote down vote up
def f_2d(x, a, scale):
    return torch.matmul(a, x.unsqueeze(-1))[..., 0] 
Example #26
Source File: utils.py    From pyfilter with MIT License 5 votes vote down vote up
def _construct_mvn(x: torch.Tensor, w: torch.Tensor):
    """
    Constructs a multivariate normal distribution of weighted samples.
    :param x: The samples
    :param w: The weights
    """

    mean = (x * w.unsqueeze(-1)).sum(0)
    centralized = x - mean
    cov = torch.matmul(w * centralized.t(), centralized)

    return MultivariateNormal(mean, scale_tril=torch.cholesky(cov)) 
Example #27
Source File: uft.py    From pyfilter with MIT License 5 votes vote down vote up
def correct(self, y: torch.Tensor, uft_pred: UFTPredictionResult):
        """
        Constructs the mean and covariance given the current observation and previous state.
        :param y: The current observation
        :param uft_pred: The prediction result to correct
        :return: Self
        """

        # ===== Calculate mean and covariance ====== #
        correction = self.calc_mean_cov(uft_pred)
        xmean, xcov, ymean, ycov = correction.xm, correction.xc, correction.ym, correction.yc

        # ==== Calculate cross covariance ==== #
        if xmean.dim() > 1:
            tx = uft_pred.spx - xmean.unsqueeze(-2)
        else:
            tx = uft_pred.spx - xmean

        if ymean.dim() > 1:
            ty = uft_pred.spy - ymean.unsqueeze(-2)
        else:
            ty = uft_pred.spy - ymean

        xycov = _covcalc(tx, ty, self._wc)

        # ==== Calculate the gain ==== #
        gain = torch.matmul(xycov, ycov.inverse())

        # ===== Calculate true mean and covariance ==== #
        txmean = xmean + torch.matmul(gain, (y - ymean).unsqueeze(-1))[..., 0]

        temp = torch.matmul(ycov, gain.transpose(-1, -2))
        txcov = xcov - torch.matmul(gain, temp)

        return UFTCorrectionResult(txmean, txcov, ymean, ycov) 
Example #28
Source File: BeliefTrackerSlotQueryMultiSlot.py    From ConvLab with MIT License 5 votes vote down vote up
def attention(self, q, k, v, d_k, mask=None, dropout=None):

        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(d_k)

        if mask is not None:
            mask = mask.unsqueeze(1)
            scores = scores.masked_fill(mask == 0, -1e9)
        scores = F.softmax(scores, dim=-1)

        if dropout is not None:
            scores = dropout(scores)

        self.scores = scores
        output = torch.matmul(scores, v)
        return output 
Example #29
Source File: transformer.py    From Doc2EDAG with MIT License 5 votes vote down vote up
def attention(query, key, value, mask=None, dropout=None):
    """Compute 'Scaled Dot Product Attention'"""
    d_k = query.size(-1)
    scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_k)
    if mask is not None:
        scores = scores.masked_fill(mask == 0, -1e9)
    p_attn = F.softmax(scores, dim=-1)
    if dropout is not None:
        p_attn = dropout(p_attn)
    return torch.matmul(p_attn, value), p_attn 
Example #30
Source File: model2.py    From controllable-text-attribute-transfer with Apache License 2.0 5 votes vote down vote up
def attention(query, key, value, mask=None, dropout=None):
    """Compute 'Scaled Dot Product Attention' """
    d_k = query.size(-1)
    scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_k)
    if mask is not None:
        scores = scores.masked_fill(mask == 0, -1e9)
    p_attn = F.softmax(scores, dim=-1)
    if dropout is not None:
        p_attn = dropout(p_attn)
    return torch.matmul(p_attn, value), p_attn