Python pytorch_pretrained_bert.modeling.BertLayerNorm() Examples
The following are 12
code examples of pytorch_pretrained_bert.modeling.BertLayerNorm().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pytorch_pretrained_bert.modeling
, or try the search function
.
Example #1
Source File: matcher.py From gobbli with Apache License 2.0 | 6 votes |
def _my_init(self): def init_weights(module): if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=self.bert_config.initializer_range * self.opt['init_ratio']) elif isinstance(module, BertLayerNorm): # Slightly different from the BERT pytorch version, which should be a bug. # Note that it only affects on training from scratch. For detailed discussions, please contact xiaodl@. # Layer normalization (https://arxiv.org/abs/1607.06450) # support both old/latest version if 'beta' in dir(module) and 'gamma' in dir(module): module.beta.data.zero_() module.gamma.data.fill_(1.0) else: module.bias.data.zero_() module.weight.data.fill_(1.0) if isinstance(module, nn.Linear): module.bias.data.zero_() self.apply(init_weights)
Example #2
Source File: san_model.py From MT-DNN with MIT License | 6 votes |
def __init__(self, num_hid, bidirect, dropout, rnn_type): super().__init__() assert isinstance(rnn_type, str) rnn_type = rnn_type.upper() assert rnn_type == "LSTM" or rnn_type == "GRU" rnn_cls = getattr(nn, rnn_type) self._rnn = rnn_cls( num_hid, num_hid, 1, bidirectional=bidirect, dropout=dropout, batch_first=True, ) self._layer_norm = BertLayerNorm(num_hid, eps=1e-12) self.rnn_type = rnn_type self.num_hid = num_hid self.ndirections = 1 + int(bidirect)
Example #3
Source File: san.py From MT-DNN with MIT License | 6 votes |
def _my_init(self): def init_weights(module): if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=0.02 * self.config.init_ratio) elif isinstance(module, BertLayerNorm): # Slightly different from the BERT pytorch version, which should be a bug. # Note that it only affects on training from scratch. For detailed discussions, please contact xiaodl@. # Layer normalization (https://arxiv.org/abs/1607.06450) # support both old/latest version if "beta" in dir(module) and "gamma" in dir(module): module.beta.data.zero_() module.gamma.data.fill_(1.0) else: module.bias.data.zero_() module.weight.data.fill_(1.0) if isinstance(module, nn.Linear): module.bias.data.zero_() self.apply(init_weights)
Example #4
Source File: evidence_pooling.py From gap with MIT License | 5 votes |
def __init__(self, config): super().__init__() self.dense = nn.Linear(config.hidden_size, config.hidden_size) self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob)
Example #5
Source File: bert_v0_1.py From semanticRetrievalMRS with MIT License | 5 votes |
def init_bert_weights(module, initializer_range): """ Initialize the weights. """ if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=initializer_range) elif isinstance(module, BertLayerNorm): module.beta.data.normal_(mean=0.0, std=initializer_range) module.gamma.data.normal_(mean=0.0, std=initializer_range) if isinstance(module, nn.Linear) and module.bias is not None: module.bias.data.zero_()
Example #6
Source File: hotpot_bert_v0.py From semanticRetrievalMRS with MIT License | 5 votes |
def init_bert_weights(module, initializer_range): """ Initialize the weights. """ if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=initializer_range) elif isinstance(module, BertLayerNorm): module.beta.data.normal_(mean=0.0, std=initializer_range) module.gamma.data.normal_(mean=0.0, std=initializer_range) if isinstance(module, nn.Linear) and module.bias is not None: module.bias.data.zero_()
Example #7
Source File: bert_multilayer_output.py From semanticRetrievalMRS with MIT License | 5 votes |
def init_bert_weights(module): """ Initialize the weights. """ initializer_range = 0.02 if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=initializer_range) elif isinstance(module, BertLayerNorm): module.beta.data.normal_(mean=0.0, std=initializer_range) module.gamma.data.normal_(mean=0.0, std=initializer_range) if isinstance(module, nn.Linear) and module.bias is not None: module.bias.data.zero_()
Example #8
Source File: bert_maxout_clf.py From semanticRetrievalMRS with MIT License | 5 votes |
def init_bert_weights(module): """ Initialize the weights. """ initializer_range = 0.02 if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=initializer_range) elif isinstance(module, BertLayerNorm): module.beta.data.normal_(mean=0.0, std=initializer_range) module.gamma.data.normal_(mean=0.0, std=initializer_range) if isinstance(module, nn.Linear) and module.bias is not None: module.bias.data.zero_()
Example #9
Source File: bert_span_v0.py From semanticRetrievalMRS with MIT License | 5 votes |
def init_bert_weights(module, initializer_range): """ Initialize the weights. """ if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=initializer_range) elif isinstance(module, BertLayerNorm): module.beta.data.normal_(mean=0.0, std=initializer_range) module.gamma.data.normal_(mean=0.0, std=initializer_range) if isinstance(module, nn.Linear) and module.bias is not None: module.bias.data.zero_()
Example #10
Source File: knowbert.py From kb with Apache License 2.0 | 5 votes |
def __init__(self, vocab: Vocabulary, entity_linker: Model, span_attention_config: Dict[str, int], should_init_kg_to_bert_inverse: bool = True, freeze: bool = False, regularizer: RegularizerApplicator = None): super().__init__(vocab, regularizer) self.entity_linker = entity_linker self.entity_embedding_dim = self.entity_linker.disambiguator.entity_embedding_dim self.contextual_embedding_dim = self.entity_linker.disambiguator.contextual_embedding_dim self.weighted_entity_layer_norm = BertLayerNorm(self.entity_embedding_dim, eps=1e-5) init_bert_weights(self.weighted_entity_layer_norm, 0.02) self.dropout = torch.nn.Dropout(0.1) # the span attention layers assert len(span_attention_config) == 4 config = BertConfig( 0, # vocab size, not used hidden_size=span_attention_config['hidden_size'], num_hidden_layers=span_attention_config['num_hidden_layers'], num_attention_heads=span_attention_config['num_attention_heads'], intermediate_size=span_attention_config['intermediate_size'] ) self.span_attention_layer = SpanAttentionLayer(config) # already init inside span attention layer # for the output! self.output_layer_norm = BertLayerNorm(self.contextual_embedding_dim, eps=1e-5) self.kg_to_bert_projection = torch.nn.Linear( self.entity_embedding_dim, self.contextual_embedding_dim ) self.should_init_kg_to_bert_inverse = should_init_kg_to_bert_inverse self._init_kg_to_bert_projection() self._freeze_all = freeze
Example #11
Source File: NER_BERT_CRF.py From NER-BERT-CRF with MIT License | 5 votes |
def init_bert_weights(self, module): """ Initialize the weights. """ if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) elif isinstance(module, BertLayerNorm): module.bias.data.zero_() module.weight.data.fill_(1.0) if isinstance(module, nn.Linear) and module.bias is not None: module.bias.data.zero_()
Example #12
Source File: san_model.py From mt-dnn with MIT License | 5 votes |
def __init__(self, num_hid, bidirect, dropout, rnn_type): super().__init__() assert isinstance(rnn_type, str) rnn_type = rnn_type.upper() assert rnn_type == 'LSTM' or rnn_type == 'GRU' rnn_cls = getattr(nn, rnn_type) self._rnn = rnn_cls(num_hid, num_hid, 1, bidirectional=bidirect, dropout=dropout, batch_first=True) self._layer_norm = BertLayerNorm(num_hid, eps=1e-12) self.rnn_type = rnn_type self.num_hid = num_hid self.ndirections = 1 + int(bidirect)