Python pytorch_pretrained_bert.modeling.BertConfig() Examples

The following are 8 code examples of pytorch_pretrained_bert.modeling.BertConfig(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pytorch_pretrained_bert.modeling , or try the search function .
Example #1
Source File: test_span_attention_layer.py    From kb with Apache License 2.0 6 votes vote down vote up
def test_span_word_attention(self):
        config_file = 'tests/fixtures/bert/bert_config.json'
        with open(config_file) as fin:
            json_config = json.load(fin)

        vocab_size = json_config.pop("vocab_size")
        config = BertConfig(vocab_size, **json_config)

        span_attn = SpanWordAttention(config)

        batch_size = 7
        timesteps = 29
        hidden_states = torch.rand(batch_size, timesteps, config.hidden_size)

        num_entity_embeddings = 11
        entity_embeddings = torch.rand(batch_size, num_entity_embeddings, config.hidden_size)
        entity_mask = entity_embeddings[:, :, 0] > 0.5

        span_attn, attention_probs = span_attn(hidden_states, entity_embeddings, entity_mask)
        self.assertEqual(list(span_attn.shape), [batch_size, timesteps, config.hidden_size]) 
Example #2
Source File: test_span_attention_layer.py    From kb with Apache License 2.0 6 votes vote down vote up
def test_span_attention_layer(self):
        config_file = 'tests/fixtures/bert/bert_config.json'
        with open(config_file) as fin:
            json_config = json.load(fin)

        vocab_size = json_config.pop("vocab_size")
        config = BertConfig(vocab_size, **json_config)
    
        batch_size = 7
        timesteps = 29
        hidden_states = torch.rand(batch_size, timesteps, config.hidden_size)
    
        num_entity_embeddings = 11
        entity_embeddings = torch.rand(batch_size, num_entity_embeddings, config.hidden_size)
        entity_mask = entity_embeddings[:, :, 0] > 0.5
    
        span_attention_layer = SpanAttentionLayer(config)
    
        output = span_attention_layer(hidden_states, entity_embeddings, entity_mask)

        self.assertEqual(list(output["output"].shape), [batch_size, timesteps, config.hidden_size]) 
Example #3
Source File: helpers.py    From ParlAI with MIT License 5 votes vote down vote up
def __init__(
        self,
        bert_model,
        output_dim,
        add_transformer_layer=False,
        layer_pulled=-1,
        aggregation="first",
    ):
        super(BertWrapper, self).__init__()
        self.layer_pulled = layer_pulled
        self.aggregation = aggregation
        self.add_transformer_layer = add_transformer_layer
        # deduce bert output dim from the size of embeddings
        bert_output_dim = bert_model.embeddings.word_embeddings.weight.size(1)

        if add_transformer_layer:
            config_for_one_layer = BertConfig(
                0,
                hidden_size=bert_output_dim,
                num_attention_heads=int(bert_output_dim / 64),
                intermediate_size=3072,
                hidden_act='gelu',
            )
            self.additional_transformer_layer = BertLayer(config_for_one_layer)
        self.additional_linear_layer = torch.nn.Linear(bert_output_dim, output_dim)
        self.bert_model = bert_model 
Example #4
Source File: helpers.py    From neural_chat with MIT License 5 votes vote down vote up
def __init__(
        self,
        bert_model,
        output_dim,
        add_transformer_layer=False,
        layer_pulled=-1,
        aggregation="first",
    ):
        super(BertWrapper, self).__init__()
        self.layer_pulled = layer_pulled
        self.aggregation = aggregation
        self.add_transformer_layer = add_transformer_layer
        # deduce bert output dim from the size of embeddings
        bert_output_dim = bert_model.embeddings.word_embeddings.weight.size(1)

        if add_transformer_layer:
            config_for_one_layer = BertConfig(
                0,
                hidden_size=bert_output_dim,
                num_attention_heads=int(bert_output_dim / 64),
                intermediate_size=3072,
                hidden_act='gelu',
            )
            self.additional_transformer_layer = BertLayer(config_for_one_layer)
        self.additional_linear_layer = torch.nn.Linear(bert_output_dim, output_dim)
        self.bert_model = bert_model 
Example #5
Source File: adv_masker.py    From bert_on_stilts with Apache License 2.0 5 votes vote down vote up
def __init__(self, vocab_size, original_hidden_size, num_layers, tau=1):
        super().__init__()
        self.bert_layer = BertLayer(BertConfig(
            vocab_size_or_config_json_file=vocab_size,
            hidden_size=original_hidden_size * num_layers,
        ))
        self.linear_layer = nn.Linear(original_hidden_size * num_layers, 1)
        self.log_sigmoid = nn.LogSigmoid()
        self.tau = tau 
Example #6
Source File: knowbert.py    From kb with Apache License 2.0 5 votes vote down vote up
def __init__(self,
                 vocab: Vocabulary,
                 entity_linker: Model,
                 span_attention_config: Dict[str, int],
                 should_init_kg_to_bert_inverse: bool = True,
                 freeze: bool = False,
                 regularizer: RegularizerApplicator = None):
        super().__init__(vocab, regularizer)

        self.entity_linker = entity_linker
        self.entity_embedding_dim = self.entity_linker.disambiguator.entity_embedding_dim
        self.contextual_embedding_dim = self.entity_linker.disambiguator.contextual_embedding_dim

        self.weighted_entity_layer_norm = BertLayerNorm(self.entity_embedding_dim, eps=1e-5)
        init_bert_weights(self.weighted_entity_layer_norm, 0.02)

        self.dropout = torch.nn.Dropout(0.1)

        # the span attention layers
        assert len(span_attention_config) == 4
        config = BertConfig(
            0, # vocab size, not used
            hidden_size=span_attention_config['hidden_size'],
            num_hidden_layers=span_attention_config['num_hidden_layers'],
            num_attention_heads=span_attention_config['num_attention_heads'],
            intermediate_size=span_attention_config['intermediate_size']
        )
        self.span_attention_layer = SpanAttentionLayer(config)
        # already init inside span attention layer

        # for the output!
        self.output_layer_norm = BertLayerNorm(self.contextual_embedding_dim, eps=1e-5)

        self.kg_to_bert_projection = torch.nn.Linear(
                self.entity_embedding_dim, self.contextual_embedding_dim
        )

        self.should_init_kg_to_bert_inverse = should_init_kg_to_bert_inverse
        self._init_kg_to_bert_projection()

        self._freeze_all = freeze 
Example #7
Source File: san_model.py    From MT-DNN with MIT License 5 votes vote down vote up
def __init__(self, config: BertConfig):
        super().__init__()
        self.embeddings = BertEmbeddings(config)
        self.encoder = SanEncoder(
            config.hidden_size,
            config.num_hidden_layers,
            True,
            config.hidden_dropout_prob,
        )
        self.pooler = SanPooler(config.hidden_size, config.hidden_dropout_prob)
        self.config = config 
Example #8
Source File: san_model.py    From mt-dnn with MIT License 5 votes vote down vote up
def __init__(self, config: BertConfig):
        super().__init__()
        self.embeddings = BertEmbeddings(config)
        self.encoder = SanEncoder(config.hidden_size, config.num_hidden_layers, True, 
                                  config.hidden_dropout_prob)
        self.pooler = SanPooler(config.hidden_size, config.hidden_dropout_prob)
        self.config = config