Python Examples of torch.nn.functional.softplus

Source File: loss.py From torch-toolbox with BSD 3-Clause "New" or "Revised" License

7 votes

def forward(self, x, target):
        similarity_matrix = x @ x.T  # need gard here
        label_matrix = target.unsqueeze(1) == target.unsqueeze(0)
        negative_matrix = label_matrix.logical_not()
        positive_matrix = label_matrix.fill_diagonal_(False)

        sp = torch.where(positive_matrix, similarity_matrix,
                         torch.zeros_like(similarity_matrix))
        sn = torch.where(negative_matrix, similarity_matrix,
                         torch.zeros_like(similarity_matrix))

        ap = torch.clamp_min(1 + self.m - sp.detach(), min=0.)
        an = torch.clamp_min(sn.detach() + self.m, min=0.)

        logit_p = -self.gamma * ap * (sp - self.dp)
        logit_n = self.gamma * an * (sn - self.dn)

        logit_p = torch.where(positive_matrix, logit_p,
                              torch.zeros_like(logit_p))
        logit_n = torch.where(negative_matrix, logit_n,
                              torch.zeros_like(logit_n))

        loss = F.softplus(torch.logsumexp(logit_p, dim=1) +
                          torch.logsumexp(logit_n, dim=1)).mean()
        return loss

Source File: DDPAE.py From DDPAE-video-prediction with MIT License

6 votes

def sample_content(self, content, sample):
    '''
    Pass into content_lstm to get a final content.
    '''
    content = content.view(-1, self.n_frames_input, self.total_components, self.content_latent_size)
    contents = []
    for i in range(self.total_components):
      z = content[:, :, i, :]
      z = self.content_lstm(z).unsqueeze(1) # batch_size x 1 x (content_latent_size * 2)
      contents.append(z)
    content = torch.cat(contents, dim=1).view(-1, self.content_latent_size * 2)

    # Get mu and sigma, and sample.
    content_mu = content[:, :self.content_latent_size]
    content_sigma = F.softplus(content[:, self.content_latent_size:])
    content = self.pyro_sample('content', dist.Normal, content_mu, content_sigma, sample)
    return content

Source File: grid_attention_layer.py From Attention-Gated-Networks with MIT License

6 votes

def _concatenation_debug(self, x, g):
        input_size = x.size()
        batch_size = input_size[0]
        assert batch_size == g.size(0)

        # theta => (b, c, t, h, w) -> (b, i_c, t, h, w) -> (b, i_c, thw)
        # phi   => (b, g_d) -> (b, i_c)
        theta_x = self.theta(x)
        theta_x_size = theta_x.size()

        # g (b, c, t', h', w') -> phi_g (b, i_c, t', h', w')
        #  Relu(theta_x + phi_g + bias) -> f = (b, i_c, thw) -> (b, i_c, t/s1, h/s2, w/s3)
        phi_g = F.upsample(self.phi(g), size=theta_x_size[2:], mode=self.upsample_mode)
        f = F.softplus(theta_x + phi_g)

        #  psi^T * f -> (b, psi_i_c, t/s1, h/s2, w/s3)
        sigm_psi_f = F.sigmoid(self.psi(f))

        # upsample the attentions and multiply
        sigm_psi_f = F.upsample(sigm_psi_f, size=input_size[2:], mode=self.upsample_mode)
        y = sigm_psi_f.expand_as(x) * x
        W_y = self.W(y)

        return W_y, sigm_psi_f

Source File: uncertainty_estimation.py From PyTorch-BayesianCNN with MIT License

6 votes

def get_uncertainty_per_image(model, input_image, T=15, normalized=False):
    input_image = input_image.unsqueeze(0)
    input_images = input_image.repeat(T, 1, 1, 1)

    net_out, _ = model(input_images)
    pred = torch.mean(net_out, dim=0).cpu().detach().numpy()
    if normalized:
        prediction = F.softplus(net_out)
        p_hat = prediction / torch.sum(prediction, dim=1).unsqueeze(1)
    else:
        p_hat = F.softmax(net_out, dim=1)
    p_hat = p_hat.detach().cpu().numpy()
    p_bar = np.mean(p_hat, axis=0)

    temp = p_hat - np.expand_dims(p_bar, 0)
    epistemic = np.dot(temp.T, temp) / T
    epistemic = np.diag(epistemic)

    aleatoric = np.diag(p_bar) - (np.dot(p_hat.T, p_hat) / T)
    aleatoric = np.diag(aleatoric)

    return pred, epistemic, aleatoric

Source File: nconv.py From nconv with GNU General Public License v3.0

6 votes

def _pos(self, p):
        pos_fn = self.pos_fn.lower()
        if pos_fn == 'softmax':
            p_sz = p.size()
            p = p.view(p_sz[0],p_sz[1], -1)
            p = F.softmax(p, -1)
            return p.view(p_sz)
        elif pos_fn == 'exp':
            return torch.exp(p)
        elif pos_fn == 'softplus':
            return F.softplus(p, beta=10)
        elif pos_fn == 'sigmoid':
            return F.sigmoid(p)
        else:
            print('Undefined positive function!')
            return

Source File: flows.py From torchkit with MIT License

6 votes

def forward(self, inputs):
        if len(inputs) == 2:
            input, logdet = inputs
        elif len(inputs) == 3:
            input, logdet, context = inputs
        else:
            raise(Exception('inputs length not correct'))
        
        output = F.sigmoid(input)
        logdet += sum_from_one(- F.softplus(input) - F.softplus(-input))
        
        
        if len(inputs) == 2:
            return output, logdet
        elif len(inputs) == 3:
            return output, logdet, context
        else:
            raise(Exception('inputs length not correct'))

Source File: flows.py From torchkit with MIT License

6 votes

def forward(self, inputs):
        x, logdet, context = inputs
        out, _ = self.mdl((x, context))
        if isinstance(self.mdl, iaf_modules.cMADE):
            mean = out[:,:,0]
            lstd = out[:,:,1]
            
        std = self.realify(lstd)
        
        if self.realify == nn_.softplus:
            x_ = mean + std * x
        elif self.realify == nn_.sigmoid:
            x_ = (-std+1.0) * mean + std * x
        elif self.realify == nn_.sigmoid2:
            x_ = (-std+2.0) * mean + std * x
        logdet_ = sum_from_one(torch.log(std)) + logdet
        return x_, logdet_, context

Source File: flows.py From torchkit with MIT License

6 votes

def reset_parameters(self):
        if isinstance(self.mean, nn_.ResLinear):
            self.mean.dot_01.scale.data.uniform_(-0.001, 0.001)
            self.mean.dot_h1.scale.data.uniform_(-0.001, 0.001)
            self.mean.dot_01.bias.data.uniform_(-0.001, 0.001)
            self.mean.dot_h1.bias.data.uniform_(-0.001, 0.001)
            self.lstd.dot_01.scale.data.uniform_(-0.001, 0.001)
            self.lstd.dot_h1.scale.data.uniform_(-0.001, 0.001)
            if self.realify == nn_.softplus:
                inv = np.log(np.exp(1-nn_.delta)-1) * 0.5
                self.lstd.dot_01.bias.data.uniform_(inv-0.001, inv+0.001)
                self.lstd.dot_h1.bias.data.uniform_(inv-0.001, inv+0.001)
            else:
                self.lstd.dot_01.bias.data.uniform_(-0.001, 0.001)
                self.lstd.dot_h1.bias.data.uniform_(-0.001, 0.001)
        elif isinstance(self.mean, nn.Linear):
            self.mean.weight.data.uniform_(-0.001, 0.001)
            self.mean.bias.data.uniform_(-0.001, 0.001)
            self.lstd.weight.data.uniform_(-0.001, 0.001)
            if self.realify == nn_.softplus:
                inv = np.log(np.exp(1-nn_.delta)-1) * 0.5
                self.lstd.bias.data.uniform_(inv-0.001, inv+0.001)
            else:
                self.lstd.bias.data.uniform_(-0.001, 0.001)

Source File: flows.py From torchkit with MIT License

6 votes

def __init__(self, dim, context_dim, 
                 oper=nn_.ResLinear, realify=nn_.softplus):
        super(LinearFlow, self).__init__()
        self.realify = realify
        
        self.dim = dim
        self.context_dim = context_dim

        
        if type(dim) is int:
            dim_ = dim
        else:
            dim_ = np.prod(dim)
        
        self.mean = oper(context_dim, dim_)
        self.lstd = oper(context_dim, dim_)
        
        self.reset_parameters()

Source File: test_nn_activations.py From numpy-ml with GNU General Public License v3.0

6 votes

def test_softplus_grad(N=50):
    from numpy_ml.neural_nets.activations import SoftPlus

    N = np.inf if N is None else N

    mine = SoftPlus()
    gold = torch_gradient_generator(F.softplus)

    i = 0
    while i < N:
        n_ex = np.random.randint(1, 100)
        n_dims = np.random.randint(1, 100)
        z = random_tensor((n_ex, n_dims), standardize=True)
        assert_almost_equal(mine.grad(z), gold(z))
        print("PASSED")
        i += 1

Source File: test_nn.py From numpy-ml with GNU General Public License v3.0

6 votes

def test_softplus_grad(N=15):
    from numpy_ml.neural_nets.activations import SoftPlus

    np.random.seed(12345)

    N = np.inf if N is None else N

    mine = SoftPlus()
    gold = torch_gradient_generator(F.softplus)

    i = 0
    while i < N:
        n_ex = np.random.randint(1, 100)
        n_dims = np.random.randint(1, 100)
        z = random_tensor((n_ex, n_dims), standardize=True)
        assert_almost_equal(mine.grad(z), gold(z))
        print("PASSED")
        i += 1


#######################################################################
#                          Layers                                     #
#######################################################################

Source File: test_nn.py From numpy-ml with GNU General Public License v3.0

6 votes

def test_softplus_activation(N=15):
    from numpy_ml.neural_nets.activations import SoftPlus

    np.random.seed(12345)

    N = np.inf if N is None else N

    mine = SoftPlus()
    gold = lambda z: F.softplus(torch.FloatTensor(z)).numpy()

    i = 0
    while i < N:
        n_dims = np.random.randint(1, 100)
        z = random_stochastic_matrix(1, n_dims)
        assert_almost_equal(mine.fn(z), gold(z))
        print("PASSED")
        i += 1


#######################################################################
#                      Activation Gradients                           #
#######################################################################

Source File: real_nvp.py From real-nvp with MIT License

6 votes

def _pre_process(self, x):
        """Dequantize the input image `x` and convert to logits.

        Args:
            x (torch.Tensor): Input image.

        Returns:
            y (torch.Tensor): Dequantized logits of `x`.

        See Also:
            - Dequantization: https://arxiv.org/abs/1511.01844, Section 3.1
            - Modeling logits: https://arxiv.org/abs/1605.08803, Section 4.1
        """
        y = (x * 255. + torch.rand_like(x)) / 256.
        y = (2 * y - 1) * self.data_constraint
        y = (y + 1) / 2
        y = y.log() - (1. - y).log()

        # Save log-determinant of Jacobian of initial transform
        ldj = F.softplus(y) + F.softplus(-y) \
            - F.softplus((1. - self.data_constraint).log() - self.data_constraint.log())
        sldj = ldj.view(ldj.size(0), -1).sum(-1)

        return y, sldj

Source File: prob_utils.py From vaeac with MIT License

6 votes

def normal_parse_params(params, min_sigma=0):
    """
    Take a Tensor (e. g. neural network output) and return
    torch.distributions.Normal distribution.
    This Normal distribution is component-wise independent,
    and its dimensionality depends on the input shape.
    First half of channels is mean of the distribution,
    the softplus of the second half is std (sigma), so there is
    no restrictions on the input tensor.

    min_sigma is the minimal value of sigma. I. e. if the above
    softplus is less than min_sigma, then sigma is clipped
    from below with value min_sigma. This regularization
    is required for the numerical stability and may be considered
    as a neural network architecture choice without any change
    to the probabilistic model.
    """
    n = params.shape[0]
    d = params.shape[1]
    mu = params[:, :d // 2]
    sigma_params = params[:, d // 2:]
    sigma = softplus(sigma_params)
    sigma = sigma.clamp(min=min_sigma)
    distr = Normal(mu, sigma)
    return distr

Source File: deep-energy-mnist.py From higher with Apache License 2.0

6 votes

def forward(self, x, y):
        # First extract an embedding z from the visual input x.
        #
        # We use softplus activations so our model has
        # (generally) non-zero second-order derivatives.
        z = F.softplus(self.conv1(x))
        z = F.max_pool2d(z, 2, 2)
        z = F.softplus(self.conv2(z))
        z = F.max_pool2d(z, 2, 2)
        z = z.view(-1, 4*4*50)
        z = F.softplus(self.fc1(z))
        z = self.fc2(z)

        # Next combine that embedding with the proposed label y
        # and pass that through a single hidden-layer to predict
        # the energy function value.
        v = torch.cat((z, y), dim=1)
        v = F.softplus(self.fce1(v))
        E = self.fce2(v).squeeze()
        return E

Source File: vpg.py From garage with MIT License

6 votes

def _compute_policy_entropy(self, obs):
        r"""Compute entropy value of probability distribution.

        Notes: P is the maximum path length (self.max_path_length)

        Args:
            obs (torch.Tensor): Observation from the environment
                with shape :math:`(N, P, O*)`.

        Returns:
            torch.Tensor: Calculated entropy values given observation
                with shape :math:`(N, P)`.

        """
        if self._stop_entropy_gradient:
            with torch.no_grad():
                policy_entropy = self.policy(obs)[0].entropy()
        else:
            policy_entropy = self.policy(obs)[0].entropy()

        # This prevents entropy from becoming negative for small policy std
        if self._use_softplus_entropy:
            policy_entropy = F.softplus(policy_entropy)

        return policy_entropy

Source File: flows.py From torchkit with MIT License

5 votes

def __init__(self, num_ds_dim=4):
        super(SigmoidFlow, self).__init__()
        self.num_ds_dim = num_ds_dim
        
        self.act_a = lambda x: nn_.softplus(x)
        self.act_b = lambda x: x
        self.act_w = lambda x: nn_.softmax(x,dim=2)

Source File: loss.py From RobGAN with MIT License

5 votes

def loss_KL_d(dis_fake, dis_real):
    L1 = torch.mean(F.softplus(-dis_real))
    L2 = torch.mean(F.softplus(dis_fake))
    return L1 + L2

Source File: train_distilled_image.py From dataset-distillation with MIT License

5 votes

def get_steps(self):
        data_label_iterable = (x for _ in range(self.state.distill_epochs) for x in zip(self.data, self.labels))
        lrs = F.softplus(self.raw_distill_lrs).unbind()

        steps = []
        for (data, label), lr in zip(data_label_iterable, lrs):
            steps.append((data, label, lr))

        return steps

Source File: flows.py From torchkit with MIT License

5 votes

def reset_parameters(self):
        self.mdl.hidden_to_output.cscale.weight.data.uniform_(-0.001, 0.001)
        self.mdl.hidden_to_output.cscale.bias.data.uniform_(0.0, 0.0)
        self.mdl.hidden_to_output.cbias.weight.data.uniform_(-0.001, 0.001)
        self.mdl.hidden_to_output.cbias.bias.data.uniform_(0.0, 0.0)
        if self.realify == nn_.softplus:
            inv = np.log(np.exp(1-nn_.delta)-1) 
            self.mdl.hidden_to_output.cbias.bias.data[1::2].uniform_(inv,inv)
        elif self.realify == nn_.sigmoid:
            self.mdl.hidden_to_output.cbias.bias.data[1::2].uniform_(2.0,2.0)

Source File: kafnets.py From kernel-activation-functions with MIT License

5 votes

def softplus_kernel(self, input):
        return F.softplus(input.unsqueeze(self.unsqueeze_dim) - self.dict)

Source File: GAN.py From MaximumMarginGANs with MIT License

5 votes

def forward(self, x):
			#return F.softplus(x, self.a, 20.)
			return self.a(x)

Source File: pytorch_util.py From leap with MIT License

5 votes

def softplus(x):
    """
    PyTorch's softplus isn't (easily) serializable.
    """
    return F.softplus(x)

Source File: static_head.py From pytorch-dnc with MIT License

5 votes

def forward(self, hidden_vb, memory_vb):
        # outputs for computing addressing for heads
        # NOTE: to be consistent w/ the dnc paper, we use
        # NOTE: sigmoid to constrain to [0, 1]
        # NOTE: oneplus to constrain to [1, +inf]
        self.key_vb   = F.tanh(self.hid_2_key(hidden_vb)).view(-1, self.num_heads, self.mem_wid)    # TODO: relu to bias the memory to store positive values ??? check again
        self.beta_vb  = F.softplus(self.hid_2_beta(hidden_vb)).view(-1, self.num_heads, 1)          # beta >=1: https://github.com/deepmind/dnc/issues/9
        self.gate_vb  = F.sigmoid(self.hid_2_gate(hidden_vb)).view(-1, self.num_heads, 1)           # gate /in (0, 1): interpolation gate, blend wl_{t-1} & wc
        self.shift_vb = F.softmax(self.hid_2_shift(hidden_vb).view(-1, self.num_heads, self.num_allowed_shifts).transpose(0, 2)).transpose(0, 2)    # shift: /sum=1
        self.gamma_vb = (1. + F.softplus(self.hid_2_gamma(hidden_vb))).view(-1, self.num_heads, 1)  # gamma >= 1: sharpen the final weights

        # now we compute the addressing mechanism
        self._content_focus(memory_vb)
        self._location_focus()

Source File: context_conditioned_policy.py From garage with MIT License

5 votes

def infer_posterior(self, context):
        r"""Compute :math:`q(z \| c)` as a function of input context and sample new z.

        Args:
            context (torch.Tensor): Context values, with shape
                :math:`(X, N, C)`. X is the number of tasks. N is batch size. C
                is the combined size of observation, action, reward, and next
                observation if next observation is used in context. Otherwise,
                C is the combined size of observation, action, and reward.

        """
        params = self._context_encoder.forward(context)
        params = params.view(context.size(0), -1,
                             self._context_encoder.output_dim)
        # with probabilistic z, predict mean and variance of q(z | c)
        if self._use_information_bottleneck:
            mu = params[..., :self._latent_dim]
            sigma_squared = F.softplus(params[..., self._latent_dim:])
            z_params = [
                product_of_gaussians(m, s)
                for m, s in zip(torch.unbind(mu), torch.unbind(sigma_squared))
            ]
            self.z_means = torch.stack([p[0] for p in z_params])
            self.z_vars = torch.stack([p[1] for p in z_params])
        else:
            self.z_means = torch.mean(params, dim=1)
        self.sample_from_belief()

    # pylint: disable=arguments-differ

Source File: dynamic_head.py From pytorch-dnc with MIT License

5 votes

def forward(self, hidden_vb, memory_vb):
        # outputs for computing addressing for heads
        # NOTE: to be consistent w/ the dnc paper, we use
        # NOTE: sigmoid to constrain to [0, 1]
        # NOTE: oneplus to constrain to [1, +inf]
        self.key_vb   = F.tanh(self.hid_2_key(hidden_vb)).view(-1, self.num_heads, self.mem_wid)    # TODO: relu to bias the memory to store positive values ??? check again
        self.beta_vb  = F.softplus(self.hid_2_beta(hidden_vb)).view(-1, self.num_heads, 1)          # beta >=1: https://github.com/deepmind/dnc/issues/9

        # now we compute the addressing mechanism
        self._content_focus(memory_vb)

Source File: activations_autofn.py From gen-efficientnet-pytorch with Apache License 2.0

5 votes

def backward(ctx, grad_output):
        x = ctx.saved_tensors[0]
        x_sigmoid = torch.sigmoid(x)
        x_tanh_sp = F.softplus(x).tanh()
        return grad_output.mul(x_tanh_sp + x * x_sigmoid * (1 - x_tanh_sp * x_tanh_sp))

Source File: activations_autofn.py From gen-efficientnet-pytorch with Apache License 2.0

5 votes

def forward(ctx, x):
        ctx.save_for_backward(x)
        y = x.mul(torch.tanh(F.softplus(x)))  # x * tanh(ln(1 + exp(x)))
        return y

Source File: activations_jit.py From gen-efficientnet-pytorch with Apache License 2.0

5 votes

def mish_jit_bwd(x, grad_output):
    x_sigmoid = torch.sigmoid(x)
    x_tanh_sp = F.softplus(x).tanh()
    return grad_output.mul(x_tanh_sp + x * x_sigmoid * (1 - x_tanh_sp * x_tanh_sp))

Source File: loss.py From RobGAN with MIT License

5 votes

def loss_KL_g(dis_fake):
    return torch.mean(F.softplus(-dis_fake))


# Hinge loss

Python torch.nn.functional.softplus() Examples