Python torch.nn.Softplus() Examples

The following are 30 code examples of torch.nn.Softplus(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch.nn , or try the search function .
Example #1
Source File: etm.py    From ETM with MIT License 7 votes vote down vote up
def get_activation(self, act):
        if act == 'tanh':
            act = nn.Tanh()
        elif act == 'relu':
            act = nn.ReLU()
        elif act == 'softplus':
            act = nn.Softplus()
        elif act == 'rrelu':
            act = nn.RReLU()
        elif act == 'leakyrelu':
            act = nn.LeakyReLU()
        elif act == 'elu':
            act = nn.ELU()
        elif act == 'selu':
            act = nn.SELU()
        elif act == 'glu':
            act = nn.GLU()
        else:
            print('Defaulting to tanh activations...')
            act = nn.Tanh()
        return act 
Example #2
Source File: supervised_topic_model.py    From causal-text-embeddings with MIT License 7 votes vote down vote up
def get_activation(self, act):
        if act == 'tanh':
            act = nn.Tanh()
        elif act == 'relu':
            act = nn.ReLU()
        elif act == 'softplus':
            act = nn.Softplus()
        elif act == 'rrelu':
            act = nn.RReLU()
        elif act == 'leakyrelu':
            act = nn.LeakyReLU()
        elif act == 'elu':
            act = nn.ELU()
        elif act == 'selu':
            act = nn.SELU()
        elif act == 'glu':
            act = nn.GLU()
        else:
            print('Defaulting to tanh activations...')
            act = nn.Tanh()
        return act 
Example #3
Source File: model.py    From cgcnn with MIT License 6 votes vote down vote up
def __init__(self, atom_fea_len, nbr_fea_len):
        """
        Initialize ConvLayer.

        Parameters
        ----------

        atom_fea_len: int
          Number of atom hidden features.
        nbr_fea_len: int
          Number of bond features.
        """
        super(ConvLayer, self).__init__()
        self.atom_fea_len = atom_fea_len
        self.nbr_fea_len = nbr_fea_len
        self.fc_full = nn.Linear(2*self.atom_fea_len+self.nbr_fea_len,
                                 2*self.atom_fea_len)
        self.sigmoid = nn.Sigmoid()
        self.softplus1 = nn.Softplus()
        self.bn1 = nn.BatchNorm1d(2*self.atom_fea_len)
        self.bn2 = nn.BatchNorm1d(self.atom_fea_len)
        self.softplus2 = nn.Softplus() 
Example #4
Source File: codec.py    From pde-surrogate with MIT License 6 votes vote down vote up
def activation(name):
    if name in ['tanh', 'Tanh']:
        return nn.Tanh()
    elif name in ['relu', 'ReLU']:
        return nn.ReLU(inplace=True)
    elif name in ['lrelu', 'LReLU']:
        return nn.LeakyReLU(inplace=True)
    elif name in ['sigmoid', 'Sigmoid']:
        return nn.Sigmoid()
    elif name in ['softplus', 'Softplus']:
        return nn.Softplus(beta=4)
    else:
        raise ValueError('Unknown activation function')


# modify the decoder network
# use upsampling instead of transconv
# it seems tranconv is much faster than neareast upsampling (or other interpo) 
Example #5
Source File: models.py    From spatial-VAE with MIT License 6 votes vote down vote up
def __init__(self, n, latent_dim, hidden_dim, n_out=1, num_layers=1, activation=nn.Tanh
                , softplus=False, resid=False):
        super(VanillaGenerator, self).__init__()
        """
        The standard MLP structure for image generation. Decodes each pixel location as a funciton of z.
        """

        self.n_out = n_out
        self.softplus = softplus

        layers = [nn.Linear(latent_dim,hidden_dim), 
                  activation()]
        for _ in range(1,num_layers):
            if resid:
                layers.append(ResidLinear(hidden_dim, hidden_dim, activation=activation))
            else:
                layers.append(nn.Linear(hidden_dim,hidden_dim))
                layers.append(activation())
        layers.append(nn.Linear(hidden_dim, n*n_out))
        if softplus:
            layers.append(nn.Softplus())

        self.layers = nn.Sequential(*layers) 
Example #6
Source File: RMeN_v1.py    From R-MeN with Apache License 2.0 6 votes vote down vote up
def __init__(self, config):
        super(RMeN, self).__init__(config)

        self.ent_embeddings = nn.Embedding(self.config.entTotal, self.config.hidden_size)  # vectorized quaternion
        self.rel_embeddings = nn.Embedding(self.config.relTotal, self.config.hidden_size)

        self.pos_h = nn.Parameter(nn.init.xavier_uniform_(torch.Tensor(1, self.config.hidden_size)))
        self.pos_r = nn.Parameter(nn.init.xavier_uniform_(torch.Tensor(1, self.config.hidden_size)))
        self.pos_t = nn.Parameter(nn.init.xavier_uniform_(torch.Tensor(1, self.config.hidden_size)))

        self.transformer_rel_rnn = RelationalMemory(
                        mem_slots=self.config.mem_slots, head_size=self.config.head_size,
                        num_heads=self.config.num_heads, input_size=self.config.hidden_size,
                        gate_style=self.config.gate_style, attention_mlp_layers=self.config.attention_mlp_layers,
                        return_all_outputs=True
                        ).to(device)
        self.model_memory = self.transformer_rel_rnn.initial_state(self.config.batch_seq_size).to(device)

        self.dropout = nn.Dropout(self.config.convkb_drop_prob)
        self.fc_layer = nn.Linear(self.transformer_rel_rnn.mem_size, 1)

        self.criterion = nn.Softplus()
        self.init_parameters() 
Example #7
Source File: ThreeConvThreeFC.py    From PyTorch-BayesianCNN with MIT License 6 votes vote down vote up
def __init__(self, outputs, inputs):
        super(ThreeConvThreeFC, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(inputs, 32, 5, stride=1, padding=2),
            nn.Softplus(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(32, 64, 5, stride=1, padding=2),
            nn.Softplus(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 128, 5, stride=1, padding=1),
            nn.Softplus(),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            FlattenLayer(2 * 2 * 128),
            nn.Linear(2 * 2 * 128, 1000),
            nn.Softplus(),
            nn.Linear(1000, 1000),
            nn.Softplus(),
            nn.Linear(1000, outputs)
        ) 
Example #8
Source File: KgCVAE.py    From MultiTurnDialogZoo with MIT License 6 votes vote down vote up
def __init__(self, context_hidden, encoder_hidden, z_hidden):
        super(VariableLayer, self).__init__()
        self.context_hidden = context_hidden
        self.encoder_hidden = encoder_hidden
        self.z_hidden = z_hidden
        self.prior_h = nn.ModuleList([nn.Linear(context_hidden, context_hidden),
                                      nn.Linear(context_hidden, context_hidden)])
        self.prior_mu = nn.Linear(context_hidden, z_hidden)
        self.prior_var = nn.Linear(context_hidden, z_hidden)

        self.posterior_h = nn.ModuleList([nn.Linear(context_hidden+encoder_hidden,
                                                    context_hidden), 
                                          nn.Linear(context_hidden, 
                                                    context_hidden)])
        self.posterior_mu = nn.Linear(context_hidden, z_hidden)
        self.posterior_var = nn.Linear(context_hidden, z_hidden)
        self.softplus = nn.Softplus() 
Example #9
Source File: VHRED.py    From MultiTurnDialogZoo with MIT License 6 votes vote down vote up
def __init__(self, context_hidden, encoder_hidden, z_hidden):
        super(VariableLayer, self).__init__()
        self.context_hidden = context_hidden
        self.encoder_hidden = encoder_hidden
        self.z_hidden = z_hidden
        self.prior_h = nn.ModuleList([nn.Linear(context_hidden, context_hidden),
                                      nn.Linear(context_hidden, context_hidden)])
        self.prior_mu = nn.Linear(context_hidden, z_hidden)
        self.prior_var = nn.Linear(context_hidden, z_hidden)

        self.posterior_h = nn.ModuleList([nn.Linear(context_hidden+encoder_hidden,
                                                    context_hidden), 
                                          nn.Linear(context_hidden, 
                                                    context_hidden)])
        self.posterior_mu = nn.Linear(context_hidden, z_hidden)
        self.posterior_var = nn.Linear(context_hidden, z_hidden)
        self.softplus = nn.Softplus() 
Example #10
Source File: layers.py    From Alchemy with MIT License 6 votes vote down vote up
def __init__(self, rbf_dim, dim=64, update_edge=True):
        """
        Args:
            rbf_dim: the dimension of the RBF layer
            dim: the dimension of linear layers
            update_edge: whether update the edge emebedding in each conv-layer
        """
        super().__init__()
        self._rbf_dim = rbf_dim
        self._dim = dim
        self._update_edge = update_edge

        self.linear_layer1 = nn.Linear(self._rbf_dim, self._dim)
        self.linear_layer2 = nn.Linear(self._dim, self._dim)
        self.linear_layer3 = nn.Linear(self._dim, self._dim)

        self.activation = nn.Softplus(beta=0.5, threshold=14) 
Example #11
Source File: layers.py    From Alchemy with MIT License 6 votes vote down vote up
def __init__(self, rbf_dim, dim=64, act="sp"):
        """
        Args:
            rbf_dim: the dimsion of the RBF layer
            dim: the dimension of linear layers
            act: activation function (default shifted softplus)
        """
        super().__init__()
        self._rbf_dim = rbf_dim
        self._dim = dim

        self.linear_layer1 = nn.Linear(self._rbf_dim, self._dim)
        self.linear_layer2 = nn.Linear(self._dim, self._dim)

        if act == "sp":
            self.activation = nn.Softplus(beta=0.5, threshold=14)
        else:
            self.activation = act 
Example #12
Source File: flows.py    From sylvester-flows with MIT License 5 votes vote down vote up
def __init__(self):

        super(Planar, self).__init__()

        self.h = nn.Tanh()
        self.softplus = nn.Softplus() 
Example #13
Source File: flows.py    From UMNN with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self):

        super(Planar, self).__init__()

        self.h = nn.Tanh()
        self.softplus = nn.Softplus() 
Example #14
Source File: VAE.py    From UMNN with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def create_encoder(self):
        """
        Helper function to create the elemental blocks for the encoder. Creates a gated convnet encoder.
        the encoder expects data as input of shape (batch_size, num_channels, width, height).
        """

        if self.input_type == 'binary':
            q_z_nn = nn.Sequential(
                GatedConv2d(self.input_size[0], 32, 5, 1, 2),
                GatedConv2d(32, 32, 5, 2, 2),
                GatedConv2d(32, 64, 5, 1, 2),
                GatedConv2d(64, 64, 5, 2, 2),
                GatedConv2d(64, 64, 5, 1, 2),
                GatedConv2d(64, 256, self.last_kernel_size, 1, 0),
            )
            q_z_mean = nn.Linear(256, self.z_size)
            q_z_var = nn.Sequential(
                nn.Linear(256, self.z_size),
                nn.Softplus(),
            )
            return q_z_nn, q_z_mean, q_z_var

        elif self.input_type == 'multinomial':
            act = None

            q_z_nn = nn.Sequential(
                GatedConv2d(self.input_size[0], 32, 5, 1, 2, activation=act),
                GatedConv2d(32, 32, 5, 2, 2, activation=act),
                GatedConv2d(32, 64, 5, 1, 2, activation=act),
                GatedConv2d(64, 64, 5, 2, 2, activation=act),
                GatedConv2d(64, 64, 5, 1, 2, activation=act),
                GatedConv2d(64, 256, self.last_kernel_size, 1, 0, activation=act)
            )
            q_z_mean = nn.Linear(256, self.z_size)
            q_z_var = nn.Sequential(nn.Linear(256, self.z_size), nn.Softplus(), nn.Hardtanh(min_val=0.01, max_val=7.))
            return q_z_nn, q_z_mean, q_z_var 
Example #15
Source File: common.py    From 3D_Appearance_SR with MIT License 5 votes vote down vote up
def act_vconv(res_act):
    res_act = res_act.lower()
    if res_act == 'softplus':
        act = nn.Softplus()
    elif res_act == 'sigmoid':
        act = nn.Sigmoid()
    elif res_act == 'tanh':
        act = nn.Tanh()
    elif res_act == 'elu':
        act = nn.ELU()
    else:
        raise NotImplementedError
    return act 
Example #16
Source File: BayesianLayers.py    From Tutorial_BayesianCompressionForDL with MIT License 5 votes vote down vote up
def __init__(self, in_features, out_features, cuda=False, init_weight=None, init_bias=None, clip_var=None):

        super(LinearGroupNJ, self).__init__()
        self.cuda = cuda
        self.in_features = in_features
        self.out_features = out_features
        self.clip_var = clip_var
        self.deterministic = False  # flag is used for compressed inference
        # trainable params according to Eq.(6)
        # dropout params
        self.z_mu = Parameter(torch.Tensor(in_features))
        self.z_logvar = Parameter(torch.Tensor(in_features))  # = z_mu^2 * alpha
        # weight params
        self.weight_mu = Parameter(torch.Tensor(out_features, in_features))
        self.weight_logvar = Parameter(torch.Tensor(out_features, in_features))

        self.bias_mu = Parameter(torch.Tensor(out_features))
        self.bias_logvar = Parameter(torch.Tensor(out_features))

        # init params either random or with pretrained net
        self.reset_parameters(init_weight, init_bias)

        # activations for kl
        self.sigmoid = nn.Sigmoid()
        self.softplus = nn.Softplus()

        # numerical stability param
        self.epsilon = 1e-8 
Example #17
Source File: layers.py    From Alchemy with MIT License 5 votes vote down vote up
def __init__(self, rbf_dim, dim):
        super().__init__()

        self._atom_dim = dim

        self.activation = nn.Softplus(beta=0.5, threshold=14)

        self.node_layer1 = nn.Linear(dim, dim, bias=True)
        self.edge_layer1 = nn.Linear(dim, dim, bias=True)
        self.conv_layer = VEConv(rbf_dim, dim)
        self.node_layer2 = nn.Linear(dim, dim)
        self.node_layer3 = nn.Linear(dim, dim) 
Example #18
Source File: dense_ed.py    From cnn-surrogate with MIT License 5 votes vote down vote up
def activation(name, *args):
    if name in ['tanh', 'Tanh']:
        return nn.Tanh()
    elif name in ['relu', 'ReLU']:
        return nn.ReLU(inplace=True)
    elif name in ['lrelu', 'LReLU']:
        return nn.LeakyReLU(inplace=True)
    elif name in ['sigmoid', 'Sigmoid']:
        return nn.Sigmoid()
    elif name in ['softplus', 'Softplus']:
        return nn.Softplus(beta=4)
    else:
        raise ValueError('Unknown activation function') 
Example #19
Source File: a3c_mlp_con.py    From pytorch-rl with MIT License 5 votes vote down vote up
def __init__(self, args):
        super(A3CMlpConModel, self).__init__(args)
        # build model
        # 0. feature layers
        self.fc1 = nn.Linear(self.input_dims[0] * self.input_dims[1], self.hidden_dim) # NOTE: for pkg="gym"
        self.rl1 = nn.ReLU()
        self.fc2 = nn.Linear(self.hidden_dim, self.hidden_dim)
        self.rl2 = nn.ReLU()
        self.fc3 = nn.Linear(self.hidden_dim, self.hidden_dim)
        self.rl3 = nn.ReLU()
        self.fc4 = nn.Linear(self.hidden_dim, self.hidden_dim)
        self.rl4 = nn.ReLU()

        self.fc1_v = nn.Linear(self.input_dims[0] * self.input_dims[1], self.hidden_dim) # NOTE: for pkg="gym"
        self.rl1_v = nn.ReLU()
        self.fc2_v = nn.Linear(self.hidden_dim, self.hidden_dim)
        self.rl2_v = nn.ReLU()
        self.fc3_v = nn.Linear(self.hidden_dim, self.hidden_dim)
        self.rl3_v = nn.ReLU()
        self.fc4_v = nn.Linear(self.hidden_dim, self.hidden_dim)
        self.rl4_v = nn.ReLU()

        # lstm
        if self.enable_lstm:
            self.lstm  = nn.LSTMCell(self.hidden_dim, self.hidden_dim)
            self.lstm_v  = nn.LSTMCell(self.hidden_dim, self.hidden_dim)

        # 1. policy output
        self.policy_5   = nn.Linear(self.hidden_dim, self.output_dims)
        self.policy_sig = nn.Linear(self.hidden_dim, self.output_dims)
        self.softplus   = nn.Softplus()
        # 2. value output
        self.value_5    = nn.Linear(self.hidden_dim, 1)

        self._reset() 
Example #20
Source File: DeepBelief_playground.py    From Brancher with MIT License 5 votes vote down vote up
def __init__(self, latent_size1, latent_size2, hidden_size=50):
        super(EncoderArchitecture2, self).__init__()
        self.l1 = nn.Linear(latent_size2, hidden_size)
        self.f1 = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, latent_size1)  # Latent mean output
        self.l3 = nn.Linear(hidden_size, latent_size1)  # Latent log sd output
        self.softplus = nn.Softplus() 
Example #21
Source File: DeepBelief_playground.py    From Brancher with MIT License 5 votes vote down vote up
def __init__(self, image_size, latent_size2, hidden_size=100):
        super(EncoderArchitecture1, self).__init__()
        self.l1 = nn.Linear(image_size, hidden_size)
        self.f1 = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, latent_size2)  # Latent mean output
        self.l3 = nn.Linear(hidden_size, latent_size2)  # Latent log sd output
        self.softplus = nn.Softplus() 
Example #22
Source File: discrete_VAE_playground.py    From Brancher with MIT License 5 votes vote down vote up
def __init__(self, image_size, latent_size, hidden_size1=512, hidden_size2=256):
        super(EncoderArchitecture, self).__init__()
        self.l1 = nn.Linear(image_size, hidden_size2)
        self.l2 = nn.Linear(hidden_size2, hidden_size1)
        self.f1 = nn.ReLU()
        self.f2 = nn.ReLU()
        self.l3 = nn.Linear(hidden_size1, latent_size)  # Latent mean output
        self.l4 = nn.Linear(hidden_size1, latent_size)  # Latent log sd output
        self.softplus = nn.Softplus() 
Example #23
Source File: DeeperBelief_playground_withLabels.py    From Brancher with MIT License 5 votes vote down vote up
def __init__(self, latent_size1, latent_size2, hidden_size=70, noise_inpt_size=None):
        super(EncoderArchitecture3, self).__init__()
        self.l1 = nn.Linear(latent_size2, hidden_size)
        self.f1 = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, latent_size1)  # Latent mean output
        self.l3 = nn.Linear(hidden_size, latent_size1)  # Latent log sd output
        self.softplus = nn.Softplus()
        if noise_inpt_size:
            self.ln = nn.Linear(noise_inpt_size, hidden_size)
        else:
            self.ln = None 
Example #24
Source File: DeeperBelief_playground_withLabels.py    From Brancher with MIT License 5 votes vote down vote up
def __init__(self, latent_size2, latent_size3, hidden_size=70, noise_inpt_size=None):
        super(EncoderArchitecture2, self).__init__()
        self.l1 = nn.Linear(latent_size3, hidden_size)
        self.f1 = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, latent_size2)  # Latent mean output
        self.l3 = nn.Linear(hidden_size, latent_size2)  # Latent log sd output
        self.softplus = nn.Softplus()
        if noise_inpt_size:
            self.ln = nn.Linear(noise_inpt_size, hidden_size)
        else:
            self.ln = None 
Example #25
Source File: DeepBeliefNetwork_KMNISTexperiment.py    From Brancher with MIT License 5 votes vote down vote up
def __init__(self, latent_size1, latent_size2, hidden_size=70, noise_inpt_size=None):
        super(EncoderArchitecture3, self).__init__()
        self.l1 = nn.Linear(latent_size2, hidden_size)
        self.f1 = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, latent_size1)  # Latent mean output
        self.l3 = nn.Linear(hidden_size, latent_size1)  # Latent log sd output
        self.softplus = nn.Softplus()
        if noise_inpt_size:
            self.ln = nn.Linear(noise_inpt_size, hidden_size)
        else:
            self.ln = None 
Example #26
Source File: DeepBeliefNetwork_KMNISTexperiment.py    From Brancher with MIT License 5 votes vote down vote up
def __init__(self, latent_size2, latent_size3, hidden_size=70, noise_inpt_size=None):
        super(EncoderArchitecture2, self).__init__()
        self.l1 = nn.Linear(latent_size3, hidden_size)
        self.f1 = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, latent_size2)  # Latent mean output
        self.l3 = nn.Linear(hidden_size, latent_size2)  # Latent log sd output
        self.softplus = nn.Softplus()
        if noise_inpt_size:
            self.ln = nn.Linear(noise_inpt_size, hidden_size)
        else:
            self.ln = None 
Example #27
Source File: DeepBeliefNetwork_KMNISTexperiment.py    From Brancher with MIT License 5 votes vote down vote up
def __init__(self, image_size, latent_size3, hidden_size=120, noise_inpt_size=None):
        super(EncoderArchitecture1, self).__init__()
        self.l1 = nn.Linear(image_size, hidden_size)
        self.f1 = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, latent_size3)  # Latent mean output
        self.l3 = nn.Linear(hidden_size, latent_size3)  # Latent log sd output
        self.softplus = nn.Softplus()
        if noise_inpt_size:
            self.ln = nn.Linear(noise_inpt_size, hidden_size)
        else:
            self.ln = None 
Example #28
Source File: DeepBeliefNetworkLabels_KMNISTexperiment.py    From Brancher with MIT License 5 votes vote down vote up
def __init__(self, latent_size1, latent_size2, hidden_size=70, noise_inpt_size=None):
        super(EncoderArchitecture3, self).__init__()
        self.l1 = nn.Linear(latent_size2, hidden_size)
        self.f1 = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, latent_size1)  # Latent mean output
        self.l3 = nn.Linear(hidden_size, latent_size1)  # Latent log sd output
        self.softplus = nn.Softplus()
        if noise_inpt_size:
            self.ln = nn.Linear(noise_inpt_size, hidden_size)
        else:
            self.ln = None 
Example #29
Source File: DeepBeliefNetworkLabels_KMNISTexperiment.py    From Brancher with MIT License 5 votes vote down vote up
def __init__(self, latent_size2, latent_size3, hidden_size=70, noise_inpt_size=None):
        super(EncoderArchitecture2, self).__init__()
        self.l1 = nn.Linear(latent_size3, hidden_size)
        self.f1 = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, latent_size2)  # Latent mean output
        self.l3 = nn.Linear(hidden_size, latent_size2)  # Latent log sd output
        self.softplus = nn.Softplus()
        if noise_inpt_size:
            self.ln = nn.Linear(noise_inpt_size, hidden_size)
        else:
            self.ln = None 
Example #30
Source File: ConvKB.py    From ConvKB with Apache License 2.0 5 votes vote down vote up
def __init__(self, config):
        super(ConvKB, self).__init__(config)

        self.ent_embeddings = nn.Embedding(self.config.entTotal, self.config.hidden_size) 
        self.rel_embeddings = nn.Embedding(self.config.relTotal, self.config.hidden_size)

        self.conv1_bn = nn.BatchNorm2d(1)
        self.conv_layer = nn.Conv2d(1, self.config.out_channels, (self.config.kernel_size, 3))  # kernel size x 3
        self.conv2_bn = nn.BatchNorm2d(self.config.out_channels)
        self.dropout = nn.Dropout(self.config.convkb_drop_prob)
        self.non_linearity = nn.ReLU()
        self.fc_layer = nn.Linear((self.config.hidden_size - self.config.kernel_size + 1) * self.config.out_channels, 1, bias=False)

        self.criterion = nn.Softplus()
        self.init_parameters()