Python pytorch_pretrained_bert.modeling.BertLayerNorm() Examples

The following are 12 code examples of pytorch_pretrained_bert.modeling.BertLayerNorm(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pytorch_pretrained_bert.modeling , or try the search function .
Example #1
Source File: matcher.py    From gobbli with Apache License 2.0 6 votes vote down vote up
def _my_init(self):
        def init_weights(module):
            if isinstance(module, (nn.Linear, nn.Embedding)):
                # Slightly different from the TF version which uses truncated_normal for initialization
                # cf https://github.com/pytorch/pytorch/pull/5617
                module.weight.data.normal_(mean=0.0, std=self.bert_config.initializer_range * self.opt['init_ratio'])
            elif isinstance(module, BertLayerNorm):
                # Slightly different from the BERT pytorch version, which should be a bug.
                # Note that it only affects on training from scratch. For detailed discussions, please contact xiaodl@.
                # Layer normalization (https://arxiv.org/abs/1607.06450)
                # support both old/latest version
                if 'beta' in dir(module) and 'gamma' in dir(module):
                    module.beta.data.zero_()
                    module.gamma.data.fill_(1.0)
                else:
                    module.bias.data.zero_()
                    module.weight.data.fill_(1.0)
            if isinstance(module, nn.Linear):
                module.bias.data.zero_()
        self.apply(init_weights) 
Example #2
Source File: san_model.py    From MT-DNN with MIT License 6 votes vote down vote up
def __init__(self, num_hid, bidirect, dropout, rnn_type):
        super().__init__()

        assert isinstance(rnn_type, str)
        rnn_type = rnn_type.upper()
        assert rnn_type == "LSTM" or rnn_type == "GRU"
        rnn_cls = getattr(nn, rnn_type)
        self._rnn = rnn_cls(
            num_hid,
            num_hid,
            1,
            bidirectional=bidirect,
            dropout=dropout,
            batch_first=True,
        )
        self._layer_norm = BertLayerNorm(num_hid, eps=1e-12)
        self.rnn_type = rnn_type
        self.num_hid = num_hid
        self.ndirections = 1 + int(bidirect) 
Example #3
Source File: san.py    From MT-DNN with MIT License 6 votes vote down vote up
def _my_init(self):
        def init_weights(module):
            if isinstance(module, (nn.Linear, nn.Embedding)):
                # Slightly different from the TF version which uses truncated_normal for initialization
                # cf https://github.com/pytorch/pytorch/pull/5617
                module.weight.data.normal_(mean=0.0, std=0.02 * self.config.init_ratio)
            elif isinstance(module, BertLayerNorm):
                # Slightly different from the BERT pytorch version, which should be a bug.
                # Note that it only affects on training from scratch. For detailed discussions, please contact xiaodl@.
                # Layer normalization (https://arxiv.org/abs/1607.06450)
                # support both old/latest version
                if "beta" in dir(module) and "gamma" in dir(module):
                    module.beta.data.zero_()
                    module.gamma.data.fill_(1.0)
                else:
                    module.bias.data.zero_()
                    module.weight.data.fill_(1.0)
            if isinstance(module, nn.Linear):
                module.bias.data.zero_()

        self.apply(init_weights) 
Example #4
Source File: evidence_pooling.py    From gap with MIT License 5 votes vote down vote up
def __init__(self, config):
        super().__init__()
        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.dropout = nn.Dropout(config.hidden_dropout_prob) 
Example #5
Source File: bert_v0_1.py    From semanticRetrievalMRS with MIT License 5 votes vote down vote up
def init_bert_weights(module, initializer_range):
    """ Initialize the weights.
    """
    if isinstance(module, (nn.Linear, nn.Embedding)):
        # Slightly different from the TF version which uses truncated_normal for initialization
        # cf https://github.com/pytorch/pytorch/pull/5617
        module.weight.data.normal_(mean=0.0, std=initializer_range)
    elif isinstance(module, BertLayerNorm):
        module.beta.data.normal_(mean=0.0, std=initializer_range)
        module.gamma.data.normal_(mean=0.0, std=initializer_range)
    if isinstance(module, nn.Linear) and module.bias is not None:
        module.bias.data.zero_() 
Example #6
Source File: hotpot_bert_v0.py    From semanticRetrievalMRS with MIT License 5 votes vote down vote up
def init_bert_weights(module, initializer_range):
    """ Initialize the weights.
    """
    if isinstance(module, (nn.Linear, nn.Embedding)):
        # Slightly different from the TF version which uses truncated_normal for initialization
        # cf https://github.com/pytorch/pytorch/pull/5617
        module.weight.data.normal_(mean=0.0, std=initializer_range)
    elif isinstance(module, BertLayerNorm):
        module.beta.data.normal_(mean=0.0, std=initializer_range)
        module.gamma.data.normal_(mean=0.0, std=initializer_range)
    if isinstance(module, nn.Linear) and module.bias is not None:
        module.bias.data.zero_() 
Example #7
Source File: bert_multilayer_output.py    From semanticRetrievalMRS with MIT License 5 votes vote down vote up
def init_bert_weights(module):
    """ Initialize the weights.
    """
    initializer_range = 0.02
    if isinstance(module, (nn.Linear, nn.Embedding)):
        # Slightly different from the TF version which uses truncated_normal for initialization
        # cf https://github.com/pytorch/pytorch/pull/5617
        module.weight.data.normal_(mean=0.0, std=initializer_range)
    elif isinstance(module, BertLayerNorm):
        module.beta.data.normal_(mean=0.0, std=initializer_range)
        module.gamma.data.normal_(mean=0.0, std=initializer_range)
    if isinstance(module, nn.Linear) and module.bias is not None:
        module.bias.data.zero_() 
Example #8
Source File: bert_maxout_clf.py    From semanticRetrievalMRS with MIT License 5 votes vote down vote up
def init_bert_weights(module):
    """ Initialize the weights.
    """
    initializer_range = 0.02
    if isinstance(module, (nn.Linear, nn.Embedding)):
        # Slightly different from the TF version which uses truncated_normal for initialization
        # cf https://github.com/pytorch/pytorch/pull/5617
        module.weight.data.normal_(mean=0.0, std=initializer_range)
    elif isinstance(module, BertLayerNorm):
        module.beta.data.normal_(mean=0.0, std=initializer_range)
        module.gamma.data.normal_(mean=0.0, std=initializer_range)
    if isinstance(module, nn.Linear) and module.bias is not None:
        module.bias.data.zero_() 
Example #9
Source File: bert_span_v0.py    From semanticRetrievalMRS with MIT License 5 votes vote down vote up
def init_bert_weights(module, initializer_range):
    """ Initialize the weights.
    """
    if isinstance(module, (nn.Linear, nn.Embedding)):
        # Slightly different from the TF version which uses truncated_normal for initialization
        # cf https://github.com/pytorch/pytorch/pull/5617
        module.weight.data.normal_(mean=0.0, std=initializer_range)
    elif isinstance(module, BertLayerNorm):
        module.beta.data.normal_(mean=0.0, std=initializer_range)
        module.gamma.data.normal_(mean=0.0, std=initializer_range)
    if isinstance(module, nn.Linear) and module.bias is not None:
        module.bias.data.zero_() 
Example #10
Source File: knowbert.py    From kb with Apache License 2.0 5 votes vote down vote up
def __init__(self,
                 vocab: Vocabulary,
                 entity_linker: Model,
                 span_attention_config: Dict[str, int],
                 should_init_kg_to_bert_inverse: bool = True,
                 freeze: bool = False,
                 regularizer: RegularizerApplicator = None):
        super().__init__(vocab, regularizer)

        self.entity_linker = entity_linker
        self.entity_embedding_dim = self.entity_linker.disambiguator.entity_embedding_dim
        self.contextual_embedding_dim = self.entity_linker.disambiguator.contextual_embedding_dim

        self.weighted_entity_layer_norm = BertLayerNorm(self.entity_embedding_dim, eps=1e-5)
        init_bert_weights(self.weighted_entity_layer_norm, 0.02)

        self.dropout = torch.nn.Dropout(0.1)

        # the span attention layers
        assert len(span_attention_config) == 4
        config = BertConfig(
            0, # vocab size, not used
            hidden_size=span_attention_config['hidden_size'],
            num_hidden_layers=span_attention_config['num_hidden_layers'],
            num_attention_heads=span_attention_config['num_attention_heads'],
            intermediate_size=span_attention_config['intermediate_size']
        )
        self.span_attention_layer = SpanAttentionLayer(config)
        # already init inside span attention layer

        # for the output!
        self.output_layer_norm = BertLayerNorm(self.contextual_embedding_dim, eps=1e-5)

        self.kg_to_bert_projection = torch.nn.Linear(
                self.entity_embedding_dim, self.contextual_embedding_dim
        )

        self.should_init_kg_to_bert_inverse = should_init_kg_to_bert_inverse
        self._init_kg_to_bert_projection()

        self._freeze_all = freeze 
Example #11
Source File: NER_BERT_CRF.py    From NER-BERT-CRF with MIT License 5 votes vote down vote up
def init_bert_weights(self, module):
        """ Initialize the weights.
        """
        if isinstance(module, (nn.Linear, nn.Embedding)): 
            # Slightly different from the TF version which uses truncated_normal for initialization
            # cf https://github.com/pytorch/pytorch/pull/5617
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
        elif isinstance(module, BertLayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        if isinstance(module, nn.Linear) and module.bias is not None:
            module.bias.data.zero_() 
Example #12
Source File: san_model.py    From mt-dnn with MIT License 5 votes vote down vote up
def __init__(self, num_hid, bidirect, dropout, rnn_type):
        super().__init__()

        assert isinstance(rnn_type, str)
        rnn_type = rnn_type.upper()
        assert rnn_type == 'LSTM' or rnn_type == 'GRU'
        rnn_cls = getattr(nn, rnn_type)
        self._rnn = rnn_cls(num_hid, num_hid, 1,
                bidirectional=bidirect,
                dropout=dropout,
                batch_first=True)
        self._layer_norm = BertLayerNorm(num_hid, eps=1e-12)
        self.rnn_type = rnn_type
        self.num_hid = num_hid
        self.ndirections = 1 + int(bidirect)