Python pytorch_pretrained_bert.BertModel.from_pretrained() Examples

The following are 18 code examples of pytorch_pretrained_bert.BertModel.from_pretrained(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pytorch_pretrained_bert.BertModel , or try the search function .
Example #1
Source File: model.py    From bert-event-extraction with MIT License 6 votes vote down vote up
def __init__(self, trigger_size=None, entity_size=None, all_postags=None, postag_embedding_dim=50, argument_size=None, entity_embedding_dim=50, device=torch.device("cpu")):
        super().__init__()
        self.bert = BertModel.from_pretrained('bert-base-cased')
        self.entity_embed = MultiLabelEmbeddingLayer(num_embeddings=entity_size, embedding_dim=entity_embedding_dim, device=device)
        self.postag_embed = nn.Embedding(num_embeddings=all_postags, embedding_dim=postag_embedding_dim)
        self.rnn = nn.LSTM(bidirectional=True, num_layers=1, input_size=768 + entity_embedding_dim, hidden_size=768 // 2, batch_first=True)

        # hidden_size = 768 + entity_embedding_dim + postag_embedding_dim
        hidden_size = 768
        self.fc1 = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(hidden_size, hidden_size, bias=True),
            nn.ReLU(),
        )
        self.fc_trigger = nn.Sequential(
            nn.Linear(hidden_size, trigger_size),
        )
        self.fc_argument = nn.Sequential(
            nn.Linear(hidden_size * 2, argument_size),
        )
        self.device = device 
Example #2
Source File: bert_pretrained_encoder.py    From lale with Apache License 2.0 5 votes vote down vote up
def __init__(self, batch_size = 32):
        # Load pre-trained model tokenizer (vocabulary)
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.max_seq_length = self.tokenizer.max_len
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = BertModel.from_pretrained('bert-base-uncased')
        self.batch_size = batch_size

    # def fit(self, X, y):
    #     # TODO: Find the right value for max sequence length
    #     return BertPretrainedEncoderImpl() 
Example #3
Source File: BERTFast.py    From TextClassificationBenchmark with MIT License 5 votes vote down vote up
def __init__(self, opt ):
        super(BERTFast, self).__init__(opt)

        self.bert_model = BertModel.from_pretrained('bert-base-uncased')  
        for param in self.bert_model.parameters():
            param.requires_grad=self.opt.bert_trained
        self.hidden2label = nn.Linear(768, opt.label_size)
        self.properties.update(
                {"bert_trained":self.opt.bert_trained
                }) 
Example #4
Source File: bert_lstm_crf.py    From Bert-BiLSTM-CRF-pytorch with MIT License 5 votes vote down vote up
def __init__(self, bert_config, tagset_size, embedding_dim, hidden_dim, rnn_layers, dropout_ratio, dropout1, use_cuda=False):
        super(BERT_LSTM_CRF, self).__init__()
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.word_embeds = BertModel.from_pretrained(bert_config)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim,
                            num_layers=rnn_layers, bidirectional=True, dropout=dropout_ratio, batch_first=True)
        self.rnn_layers = rnn_layers
        self.dropout1 = nn.Dropout(p=dropout1)
        self.crf = CRF(target_size=tagset_size, average_batch=True, use_cuda=use_cuda)
        self.liner = nn.Linear(hidden_dim*2, tagset_size+2)
        self.tagset_size = tagset_size 
Example #5
Source File: make_aspect_term_model.py    From MAMS-for-ABSA with Apache License 2.0 5 votes vote down vote up
def make_bert_capsule_network(config):
    base_path = os.path.join(config['base_path'])
    log_path = os.path.join(base_path, 'log/log.yml')
    log = yaml.safe_load(open(log_path))
    config = config['aspect_term_model'][config['aspect_term_model']['type']]
    bert = BertModel.from_pretrained('bert-base-uncased')
    model = BertCapsuleNetwork(
        bert=bert,
        bert_size=config['bert_size'],
        capsule_size=config['capsule_size'],
        dropout=config['dropout'],
        num_categories=log['num_categories']
    )
    model.load_sentiment(os.path.join(base_path, 'processed/sentiment_matrix.npy'))
    return model 
Example #6
Source File: make_aspect_category_model.py    From MAMS-for-ABSA with Apache License 2.0 5 votes vote down vote up
def make_bert_capsule_network(config):
    base_path = os.path.join(config['base_path'])
    log_path = os.path.join(base_path, 'log/log.yml')
    log = yaml.safe_load(open(log_path))
    config = config['aspect_category_model'][config['aspect_category_model']['type']]
    bert = BertModel.from_pretrained('bert-base-uncased')
    model = BertCapsuleNetwork(
        bert=bert,
        bert_size=config['bert_size'],
        capsule_size=config['capsule_size'],
        dropout=config['dropout'],
        num_categories=log['num_categories']
    )
    model.load_sentiment(os.path.join(base_path, 'processed/sentiment_matrix.npy'))
    return model 
Example #7
Source File: BERT_Model.py    From bert-sense with MIT License 5 votes vote down vote up
def __init__(self, device_number='cuda:2', use_cuda = True):
        
        self.device_number = device_number
        self.use_cuda = use_cuda
        
        self.tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')
        
        self.model = BertModel.from_pretrained('bert-large-uncased')
        self.model.eval()
        
        if use_cuda:
            self.model.to(device_number) 
Example #8
Source File: input_embedding.py    From dstc8-meta-dialog with MIT License 5 votes vote down vote up
def model(self):
    """lazy model loading"""
    with MODEL_DOWNLOAD_LOCK:
      # use lock to ensure model isn't downloaded by two processes at once
      if not getattr(self, "_model", None):
        self._model = BertModel.from_pretrained('bert-base-uncased')
        self._model.eval()
      assert self._model.config.hidden_size == self.embed_dim
    if cuda_utils.CUDA_ENABLED and self.use_cuda_if_available:
      self._model.cuda()
    return self._model 
Example #9
Source File: input_embedding.py    From dstc8-meta-dialog with MIT License 5 votes vote down vote up
def tokenizer(self):
    """lazy model loading"""
    with MODEL_DOWNLOAD_LOCK:
      # use lock to ensure model isn't downloaded by two processes at once
      if not getattr(self, "_tokenizer", None):
        self._tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.pad_token = '[PAD]'
        self.bos_token = '[BOS]'
        self.eos_token = '[EOS]'
        self.unk_token = '[UNK]'
        self.pad_idx = self._tokenizer.vocab[self.pad_token]
        self.unk_idx = self._tokenizer.vocab[self.unk_token]

        # add EOS and BOS tokens to vocab by reusing unused slots
        self._tokenizer.basic_tokenizer.never_split += (self.eos_token, self.bos_token)
        vocab = self._tokenizer.vocab
        oldkey, newkey = '[unused1]', self.bos_token
        vocab = OrderedDict((newkey if k == oldkey else k, v) for k, v in vocab.items())
        oldkey, newkey = '[unused2]', self.eos_token
        vocab = OrderedDict((newkey if k == oldkey else k, v) for k, v in vocab.items())
        self._tokenizer.vocab = vocab
        self._tokenizer.wordpiece_tokenizer.vocab = vocab
        self.bos_idx = vocab[self.bos_token]
        self.eos_idx = vocab[self.eos_token]
        ids_to_tokens = OrderedDict(
            [(ids, tok) for tok, ids in vocab.items()])
        self._tokenizer.ids_to_tokens = ids_to_tokens
        self._tokenizer.wordpiece_tokenizer.ids_to_tokens = ids_to_tokens
    return self._tokenizer 
Example #10
Source File: model_builder.py    From BertSum with Apache License 2.0 5 votes vote down vote up
def __init__(self, temp_dir, load_pretrained_bert, bert_config):
        super(Bert, self).__init__()
        if(load_pretrained_bert):
            self.model = BertModel.from_pretrained('bert-base-uncased', cache_dir=temp_dir)
        else:
            self.model = BertModel(bert_config) 
Example #11
Source File: evaluate.py    From Counterfactual-StoryRW with MIT License 5 votes vote down vote up
def eval_semantic_sim_score(instances: List[CFRInstance], bert_model_type="bert-base-uncased"):

    tokenizer = BertTokenizer.from_pretrained(bert_model_type)
    model = BertModel.from_pretrained(bert_model_type)
    model.eval()
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)

    drift_similarities = []

    for instance in instances:
        clean_original_story = _clean_text(instance.original_context + ' ' + instance.original_ending)
        predicted_ending = _clean_text(instance.cf_context + ' ' + instance.predicted_ending)

        original_story_emb = _bert_embed_sentence(clean_original_story, model, tokenizer)
        predicted_ending_emb = _bert_embed_sentence(predicted_ending, model, tokenizer)

        all_sims = []
        for gold_cf in instance.gold_cf_endings:
            clean_gold_cf = _clean_text(instance.cf_context + ' ' + gold_cf)
            gold_cf_emb = _bert_embed_sentence(clean_gold_cf, model, tokenizer)

            all_sims.append(drift_similarity(original_story_emb, predicted_ending_emb, gold_cf_emb))

        drift_similarities.append(np.max(all_sims))

    return {
        "drift_similarity": np.mean(drift_similarities),
        "drift_similarity_by_instance": [float(f) for f in  drift_similarities]
    } 
Example #12
Source File: model.py    From mrqa with Apache License 2.0 5 votes vote down vote up
def __init__(self, bert_name_or_config, num_classes=6, hidden_size=768,
                 num_layers=3, dropout=0.1, dis_lambda=0.5, concat=False, anneal=False):
        super(DomainQA, self).__init__()
        if isinstance(bert_name_or_config, BertConfig):
            self.bert = BertModel(bert_name_or_config)
        else:
            self.bert = BertModel.from_pretrained("bert-base-uncased")

        self.config = self.bert.config

        self.qa_outputs = nn.Linear(hidden_size, 2)
        # init weight
        self.qa_outputs.weight.data.normal_(mean=0.0, std=0.02)
        self.qa_outputs.bias.data.zero_()
        if concat:
            input_size = 2 * hidden_size
        else:
            input_size = hidden_size
        self.discriminator = DomainDiscriminator(num_classes, input_size, hidden_size, num_layers, dropout)

        self.num_classes = num_classes
        self.dis_lambda = dis_lambda
        self.anneal = anneal
        self.concat = concat
        self.sep_id = 102

    # only for prediction 
Example #13
Source File: bert_classifier.py    From neural_chat with MIT License 5 votes vote down vote up
def build_model(self):
        """Construct the model."""
        num_classes = len(self.class_list)
        return BertWrapper(BertModel.from_pretrained(self.pretrained_path), num_classes) 
Example #14
Source File: prebert.py    From ner_with_dependency with GNU General Public License v3.0 5 votes vote down vote up
def load_bert():
    # Load pre-trained model tokenizer (vocabulary)
    tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
    model = BertModel.from_pretrained('bert-base-cased')
    model.eval()
    model.to(device)
    return tokenizer, model 
Example #15
Source File: bert_classifier.py    From ParlAI with MIT License 5 votes vote down vote up
def build_model(self):
        """
        Construct the model.
        """
        num_classes = len(self.class_list)
        return BertWrapper(BertModel.from_pretrained(self.pretrained_path), num_classes) 
Example #16
Source File: bert_servant.py    From combine-FEVER-NSMN with MIT License 5 votes vote down vote up
def __init__(self, bert_type_name='') -> None:
        super().__init__()
        self.bert_type_name = bert_type_name

        self.bert_tokenizer = BertTokenizer.from_pretrained(self.bert_type_name)

        self.bert_model: BertModel = BertModel.from_pretrained(self.bert_type_name)
        self.bert_model.eval() 
Example #17
Source File: model.py    From DFGN-pytorch with MIT License 5 votes vote down vote up
def __init__(self, top_rnns=False, vocab_size=None, device='cpu', finetuning=False):
        super().__init__()
        self.bert = BertModel.from_pretrained('bert-base-cased')

        self.top_rnns=top_rnns
        if top_rnns:
            self.rnn = nn.LSTM(bidirectional=True, num_layers=2, input_size=768, hidden_size=768//2, batch_first=True)
        self.fc = nn.Linear(768, vocab_size)

        self.device = device
        self.finetuning = finetuning 
Example #18
Source File: parse_nk.py    From self-attentive-parser with MIT License 5 votes vote down vote up
def get_bert(bert_model, bert_do_lower_case):
    # Avoid a hard dependency on BERT by only importing it if it's being used
    from pytorch_pretrained_bert import BertTokenizer, BertModel
    if bert_model.endswith('.tar.gz'):
        tokenizer = BertTokenizer.from_pretrained(bert_model.replace('.tar.gz', '-vocab.txt'), do_lower_case=bert_do_lower_case)
    else:
        tokenizer = BertTokenizer.from_pretrained(bert_model, do_lower_case=bert_do_lower_case)
    bert = BertModel.from_pretrained(bert_model)
    return tokenizer, bert

# %%