Python pytorch_pretrained_bert.BertModel.from_pretrained() Examples
The following are 18
code examples of pytorch_pretrained_bert.BertModel.from_pretrained().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pytorch_pretrained_bert.BertModel
, or try the search function
.
Example #1
Source File: model.py From bert-event-extraction with MIT License | 6 votes |
def __init__(self, trigger_size=None, entity_size=None, all_postags=None, postag_embedding_dim=50, argument_size=None, entity_embedding_dim=50, device=torch.device("cpu")): super().__init__() self.bert = BertModel.from_pretrained('bert-base-cased') self.entity_embed = MultiLabelEmbeddingLayer(num_embeddings=entity_size, embedding_dim=entity_embedding_dim, device=device) self.postag_embed = nn.Embedding(num_embeddings=all_postags, embedding_dim=postag_embedding_dim) self.rnn = nn.LSTM(bidirectional=True, num_layers=1, input_size=768 + entity_embedding_dim, hidden_size=768 // 2, batch_first=True) # hidden_size = 768 + entity_embedding_dim + postag_embedding_dim hidden_size = 768 self.fc1 = nn.Sequential( # nn.Dropout(0.5), nn.Linear(hidden_size, hidden_size, bias=True), nn.ReLU(), ) self.fc_trigger = nn.Sequential( nn.Linear(hidden_size, trigger_size), ) self.fc_argument = nn.Sequential( nn.Linear(hidden_size * 2, argument_size), ) self.device = device
Example #2
Source File: bert_pretrained_encoder.py From lale with Apache License 2.0 | 5 votes |
def __init__(self, batch_size = 32): # Load pre-trained model tokenizer (vocabulary) self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') self.max_seq_length = self.tokenizer.max_len self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.model = BertModel.from_pretrained('bert-base-uncased') self.batch_size = batch_size # def fit(self, X, y): # # TODO: Find the right value for max sequence length # return BertPretrainedEncoderImpl()
Example #3
Source File: BERTFast.py From TextClassificationBenchmark with MIT License | 5 votes |
def __init__(self, opt ): super(BERTFast, self).__init__(opt) self.bert_model = BertModel.from_pretrained('bert-base-uncased') for param in self.bert_model.parameters(): param.requires_grad=self.opt.bert_trained self.hidden2label = nn.Linear(768, opt.label_size) self.properties.update( {"bert_trained":self.opt.bert_trained })
Example #4
Source File: bert_lstm_crf.py From Bert-BiLSTM-CRF-pytorch with MIT License | 5 votes |
def __init__(self, bert_config, tagset_size, embedding_dim, hidden_dim, rnn_layers, dropout_ratio, dropout1, use_cuda=False): super(BERT_LSTM_CRF, self).__init__() self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.word_embeds = BertModel.from_pretrained(bert_config) self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=rnn_layers, bidirectional=True, dropout=dropout_ratio, batch_first=True) self.rnn_layers = rnn_layers self.dropout1 = nn.Dropout(p=dropout1) self.crf = CRF(target_size=tagset_size, average_batch=True, use_cuda=use_cuda) self.liner = nn.Linear(hidden_dim*2, tagset_size+2) self.tagset_size = tagset_size
Example #5
Source File: make_aspect_term_model.py From MAMS-for-ABSA with Apache License 2.0 | 5 votes |
def make_bert_capsule_network(config): base_path = os.path.join(config['base_path']) log_path = os.path.join(base_path, 'log/log.yml') log = yaml.safe_load(open(log_path)) config = config['aspect_term_model'][config['aspect_term_model']['type']] bert = BertModel.from_pretrained('bert-base-uncased') model = BertCapsuleNetwork( bert=bert, bert_size=config['bert_size'], capsule_size=config['capsule_size'], dropout=config['dropout'], num_categories=log['num_categories'] ) model.load_sentiment(os.path.join(base_path, 'processed/sentiment_matrix.npy')) return model
Example #6
Source File: make_aspect_category_model.py From MAMS-for-ABSA with Apache License 2.0 | 5 votes |
def make_bert_capsule_network(config): base_path = os.path.join(config['base_path']) log_path = os.path.join(base_path, 'log/log.yml') log = yaml.safe_load(open(log_path)) config = config['aspect_category_model'][config['aspect_category_model']['type']] bert = BertModel.from_pretrained('bert-base-uncased') model = BertCapsuleNetwork( bert=bert, bert_size=config['bert_size'], capsule_size=config['capsule_size'], dropout=config['dropout'], num_categories=log['num_categories'] ) model.load_sentiment(os.path.join(base_path, 'processed/sentiment_matrix.npy')) return model
Example #7
Source File: BERT_Model.py From bert-sense with MIT License | 5 votes |
def __init__(self, device_number='cuda:2', use_cuda = True): self.device_number = device_number self.use_cuda = use_cuda self.tokenizer = BertTokenizer.from_pretrained('bert-large-uncased') self.model = BertModel.from_pretrained('bert-large-uncased') self.model.eval() if use_cuda: self.model.to(device_number)
Example #8
Source File: input_embedding.py From dstc8-meta-dialog with MIT License | 5 votes |
def model(self): """lazy model loading""" with MODEL_DOWNLOAD_LOCK: # use lock to ensure model isn't downloaded by two processes at once if not getattr(self, "_model", None): self._model = BertModel.from_pretrained('bert-base-uncased') self._model.eval() assert self._model.config.hidden_size == self.embed_dim if cuda_utils.CUDA_ENABLED and self.use_cuda_if_available: self._model.cuda() return self._model
Example #9
Source File: input_embedding.py From dstc8-meta-dialog with MIT License | 5 votes |
def tokenizer(self): """lazy model loading""" with MODEL_DOWNLOAD_LOCK: # use lock to ensure model isn't downloaded by two processes at once if not getattr(self, "_tokenizer", None): self._tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') self.pad_token = '[PAD]' self.bos_token = '[BOS]' self.eos_token = '[EOS]' self.unk_token = '[UNK]' self.pad_idx = self._tokenizer.vocab[self.pad_token] self.unk_idx = self._tokenizer.vocab[self.unk_token] # add EOS and BOS tokens to vocab by reusing unused slots self._tokenizer.basic_tokenizer.never_split += (self.eos_token, self.bos_token) vocab = self._tokenizer.vocab oldkey, newkey = '[unused1]', self.bos_token vocab = OrderedDict((newkey if k == oldkey else k, v) for k, v in vocab.items()) oldkey, newkey = '[unused2]', self.eos_token vocab = OrderedDict((newkey if k == oldkey else k, v) for k, v in vocab.items()) self._tokenizer.vocab = vocab self._tokenizer.wordpiece_tokenizer.vocab = vocab self.bos_idx = vocab[self.bos_token] self.eos_idx = vocab[self.eos_token] ids_to_tokens = OrderedDict( [(ids, tok) for tok, ids in vocab.items()]) self._tokenizer.ids_to_tokens = ids_to_tokens self._tokenizer.wordpiece_tokenizer.ids_to_tokens = ids_to_tokens return self._tokenizer
Example #10
Source File: model_builder.py From BertSum with Apache License 2.0 | 5 votes |
def __init__(self, temp_dir, load_pretrained_bert, bert_config): super(Bert, self).__init__() if(load_pretrained_bert): self.model = BertModel.from_pretrained('bert-base-uncased', cache_dir=temp_dir) else: self.model = BertModel(bert_config)
Example #11
Source File: evaluate.py From Counterfactual-StoryRW with MIT License | 5 votes |
def eval_semantic_sim_score(instances: List[CFRInstance], bert_model_type="bert-base-uncased"): tokenizer = BertTokenizer.from_pretrained(bert_model_type) model = BertModel.from_pretrained(bert_model_type) model.eval() device = 'cuda' if torch.cuda.is_available() else 'cpu' model.to(device) drift_similarities = [] for instance in instances: clean_original_story = _clean_text(instance.original_context + ' ' + instance.original_ending) predicted_ending = _clean_text(instance.cf_context + ' ' + instance.predicted_ending) original_story_emb = _bert_embed_sentence(clean_original_story, model, tokenizer) predicted_ending_emb = _bert_embed_sentence(predicted_ending, model, tokenizer) all_sims = [] for gold_cf in instance.gold_cf_endings: clean_gold_cf = _clean_text(instance.cf_context + ' ' + gold_cf) gold_cf_emb = _bert_embed_sentence(clean_gold_cf, model, tokenizer) all_sims.append(drift_similarity(original_story_emb, predicted_ending_emb, gold_cf_emb)) drift_similarities.append(np.max(all_sims)) return { "drift_similarity": np.mean(drift_similarities), "drift_similarity_by_instance": [float(f) for f in drift_similarities] }
Example #12
Source File: model.py From mrqa with Apache License 2.0 | 5 votes |
def __init__(self, bert_name_or_config, num_classes=6, hidden_size=768, num_layers=3, dropout=0.1, dis_lambda=0.5, concat=False, anneal=False): super(DomainQA, self).__init__() if isinstance(bert_name_or_config, BertConfig): self.bert = BertModel(bert_name_or_config) else: self.bert = BertModel.from_pretrained("bert-base-uncased") self.config = self.bert.config self.qa_outputs = nn.Linear(hidden_size, 2) # init weight self.qa_outputs.weight.data.normal_(mean=0.0, std=0.02) self.qa_outputs.bias.data.zero_() if concat: input_size = 2 * hidden_size else: input_size = hidden_size self.discriminator = DomainDiscriminator(num_classes, input_size, hidden_size, num_layers, dropout) self.num_classes = num_classes self.dis_lambda = dis_lambda self.anneal = anneal self.concat = concat self.sep_id = 102 # only for prediction
Example #13
Source File: bert_classifier.py From neural_chat with MIT License | 5 votes |
def build_model(self): """Construct the model.""" num_classes = len(self.class_list) return BertWrapper(BertModel.from_pretrained(self.pretrained_path), num_classes)
Example #14
Source File: prebert.py From ner_with_dependency with GNU General Public License v3.0 | 5 votes |
def load_bert(): # Load pre-trained model tokenizer (vocabulary) tokenizer = BertTokenizer.from_pretrained('bert-base-cased') model = BertModel.from_pretrained('bert-base-cased') model.eval() model.to(device) return tokenizer, model
Example #15
Source File: bert_classifier.py From ParlAI with MIT License | 5 votes |
def build_model(self): """ Construct the model. """ num_classes = len(self.class_list) return BertWrapper(BertModel.from_pretrained(self.pretrained_path), num_classes)
Example #16
Source File: bert_servant.py From combine-FEVER-NSMN with MIT License | 5 votes |
def __init__(self, bert_type_name='') -> None: super().__init__() self.bert_type_name = bert_type_name self.bert_tokenizer = BertTokenizer.from_pretrained(self.bert_type_name) self.bert_model: BertModel = BertModel.from_pretrained(self.bert_type_name) self.bert_model.eval()
Example #17
Source File: model.py From DFGN-pytorch with MIT License | 5 votes |
def __init__(self, top_rnns=False, vocab_size=None, device='cpu', finetuning=False): super().__init__() self.bert = BertModel.from_pretrained('bert-base-cased') self.top_rnns=top_rnns if top_rnns: self.rnn = nn.LSTM(bidirectional=True, num_layers=2, input_size=768, hidden_size=768//2, batch_first=True) self.fc = nn.Linear(768, vocab_size) self.device = device self.finetuning = finetuning
Example #18
Source File: parse_nk.py From self-attentive-parser with MIT License | 5 votes |
def get_bert(bert_model, bert_do_lower_case): # Avoid a hard dependency on BERT by only importing it if it's being used from pytorch_pretrained_bert import BertTokenizer, BertModel if bert_model.endswith('.tar.gz'): tokenizer = BertTokenizer.from_pretrained(bert_model.replace('.tar.gz', '-vocab.txt'), do_lower_case=bert_do_lower_case) else: tokenizer = BertTokenizer.from_pretrained(bert_model, do_lower_case=bert_do_lower_case) bert = BertModel.from_pretrained(bert_model) return tokenizer, bert # %%