Python pytorch_pretrained_bert.BertTokenizer.from_pretrained() Examples

The following are 30 code examples of pytorch_pretrained_bert.BertTokenizer.from_pretrained(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pytorch_pretrained_bert.BertTokenizer , or try the search function .
Example #1
Source File: utils_unified.py    From interpret-text with MIT License 6 votes vote down vote up
def _get_single_embedding(model, text, device):
    """Get the bert embedding for a single sentence
    :param text: The current sentence
    :type text: str
    :param device: A pytorch device
    :type device: torch.device
    :param model: a pytorch model
    :type model: torch.nn
    :return: A bert embedding of the single sentence
    :rtype: torch.embedding
    """
    tokenizer = BertTokenizer.from_pretrained(Language.ENGLISH)
    words = [BertTokens.CLS] + tokenizer.tokenize(text) + [BertTokens.SEP]
    tokenized_ids = tokenizer.convert_tokens_to_ids(words)
    token_tensor = torch.tensor([tokenized_ids], device=device)
    embedding = model.bert.embeddings(token_tensor)[0]
    return embedding, words 
Example #2
Source File: bert_dictionary.py    From ParlAI with MIT License 6 votes vote down vote up
def __init__(self, opt):
        super().__init__(opt)
        # initialize from vocab path
        download(opt['datapath'])
        vocab_path = os.path.join(opt['datapath'], 'models', 'bert_models', VOCAB_PATH)
        self.tokenizer = BertTokenizer.from_pretrained(vocab_path)

        self.start_token = '[CLS]'
        self.end_token = '[SEP]'
        self.null_token = '[PAD]'
        self.start_idx = self.tokenizer.convert_tokens_to_ids(['[CLS]'])[
            0
        ]  # should be 101
        self.end_idx = self.tokenizer.convert_tokens_to_ids(['[SEP]'])[
            0
        ]  # should be 102
        self.pad_idx = self.tokenizer.convert_tokens_to_ids(['[PAD]'])[0]  # should be 0
        # set tok2ind for special tokens
        self.tok2ind[self.start_token] = self.start_idx
        self.tok2ind[self.end_token] = self.end_idx
        self.tok2ind[self.null_token] = self.pad_idx
        # set ind2tok for special tokens
        self.ind2tok[self.start_idx] = self.start_token
        self.ind2tok[self.end_idx] = self.end_token
        self.ind2tok[self.pad_idx] = self.null_token 
Example #3
Source File: bert_dictionary.py    From neural_chat with MIT License 6 votes vote down vote up
def __init__(self, opt):
        super().__init__(opt)
        # initialize from vocab path
        download(opt['datapath'])
        vocab_path = os.path.join(opt['datapath'], 'models', 'bert_models', VOCAB_PATH)
        self.tokenizer = BertTokenizer.from_pretrained(vocab_path)

        self.start_token = '[CLS]'
        self.end_token = '[SEP]'
        self.null_token = '[PAD]'
        self.start_idx = self.tokenizer.convert_tokens_to_ids(['[CLS]'])[
            0
        ]  # should be 101
        self.end_idx = self.tokenizer.convert_tokens_to_ids(['[SEP]'])[
            0
        ]  # should be 102
        self.pad_idx = self.tokenizer.convert_tokens_to_ids(['[PAD]'])[0]  # should be 0
        # set tok2ind for special tokens
        self.tok2ind[self.start_token] = self.start_idx
        self.tok2ind[self.end_token] = self.end_idx
        self.tok2ind[self.null_token] = self.pad_idx
        # set ind2tok for special tokens
        self.ind2tok[self.start_idx] = self.start_token
        self.ind2tok[self.end_idx] = self.end_token
        self.ind2tok[self.pad_idx] = self.null_token 
Example #4
Source File: data_loader.py    From NER-BERT-pytorch with MIT License 6 votes vote down vote up
def __init__(self, data_dir, bert_model_dir, params, token_pad_idx=0):
        self.data_dir = data_dir
        self.batch_size = params.batch_size
        self.max_len = params.max_len
        self.device = params.device
        self.seed = params.seed
        self.token_pad_idx = 0

        tags = self.load_tags()
        self.tag2idx = {tag: idx for idx, tag in enumerate(tags)}
        self.idx2tag = {idx: tag for idx, tag in enumerate(tags)}
        params.tag2idx = self.tag2idx
        params.idx2tag = self.idx2tag
        self.tag_pad_idx = self.tag2idx['O']

        self.tokenizer = BertTokenizer.from_pretrained(bert_model_dir, do_lower_case=True) 
Example #5
Source File: qa_sampler.py    From semanticRetrievalMRS with MIT License 6 votes vote down vote up
def inspect_sampler_squad_examples():
    bert_model_name = "bert-base-uncased"
    bert_pretrain_path = config.PRO_ROOT / '.pytorch_pretrained_bert'
    do_lower_case = True
    max_pre_context_length = 315
    max_query_length = 64
    doc_stride = 128
    debug = True

    tokenizer = BertTokenizer.from_pretrained(bert_model_name, do_lower_case=do_lower_case,
                                              cache_dir=bert_pretrain_path)

    squad_train_v2 = common.load_json(config.SQUAD_TRAIN_2_0)

    train_eitem_list = preprocessing_squad(squad_train_v2)
    train_fitem_dict, train_fitem_list = eitems_to_fitems(train_eitem_list, tokenizer, is_training=False,
                                                          max_tokens_for_doc=max_pre_context_length,
                                                          doc_stride=doc_stride,
                                                          debug=debug)
    print(len(train_fitem_list)) 
Example #6
Source File: model.py    From transfer-nlp with MIT License 6 votes vote down vote up
def __init__(self, embed_dim: int, hidden_dim: int, num_max_positions: int, num_heads: int, num_layers: int, dropout: float, causal: bool,
                 initializer_range: float, num_classes: int):
        super().__init__()
        self.initializer_range: float = initializer_range
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-cased', do_lower_case=False)
        num_embeddings = len(self.tokenizer.vocab)
        self.num_layers = num_layers
        self.transformer = Transformer(embed_dim, hidden_dim, num_embeddings,
                                       num_max_positions, num_heads, num_layers,
                                       dropout, causal=causal)

        self.lm_head = torch.nn.Linear(embed_dim, num_embeddings, bias=False)
        self.classification_head = torch.nn.Linear(embed_dim, num_classes)

        self.apply(self.init_weights)
        self.tie_weights() 
Example #7
Source File: model.py    From transfer-nlp with MIT License 6 votes vote down vote up
def __init__(self, embed_dim: int, hidden_dim: int, num_embeddings: int, num_max_positions: int, num_heads: int, num_layers: int, dropout: float,
                 causal: bool):
        super().__init__()
        self.causal: bool = causal
        self.tokens_embeddings: torch.nn.Embedding = torch.nn.Embedding(num_embeddings, embed_dim)
        self.position_embeddings: torch.nn.Embedding = torch.nn.Embedding(num_max_positions, embed_dim)
        self.dropout: torch.nn.Dropout = torch.nn.Dropout(dropout)

        self.attentions, self.feed_forwards = torch.nn.ModuleList(), torch.nn.ModuleList()
        self.layer_norms_1, self.layer_norms_2 = torch.nn.ModuleList(), torch.nn.ModuleList()
        for _ in range(num_layers):
            self.attentions.append(torch.nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout))
            self.feed_forwards.append(torch.nn.Sequential(torch.nn.Linear(embed_dim, hidden_dim),
                                                          torch.nn.ReLU(),
                                                          torch.nn.Linear(hidden_dim, embed_dim)))
            self.layer_norms_1.append(torch.nn.LayerNorm(embed_dim, eps=1e-12))
            self.layer_norms_2.append(torch.nn.LayerNorm(embed_dim, eps=1e-12))

        self.attn_mask = None
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-cased', do_lower_case=False) 
Example #8
Source File: model.py    From transfer-nlp with MIT License 6 votes vote down vote up
def __init__(self, embed_dim: int, hidden_dim: int, num_max_positions: int, num_heads: int, num_layers: int, dropout: float, causal: bool,
                 initializer_range: float, num_classes: int):
        super().__init__()
        self.initializer_range: float = initializer_range
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-cased', do_lower_case=False)
        num_embeddings = len(self.tokenizer.vocab)
        self.num_layers = num_layers
        self.transformer = Transformer(embed_dim, hidden_dim, num_embeddings,
                                       num_max_positions, num_heads, num_layers,
                                       dropout, causal=causal)

        self.lm_head = torch.nn.Linear(embed_dim, num_embeddings, bias=False)
        self.classification_head = torch.nn.Linear(embed_dim, num_classes)

        self.apply(self.init_weights)
        self.tie_weights() 
Example #9
Source File: model.py    From transfer-nlp with MIT License 6 votes vote down vote up
def __init__(self, embed_dim: int, hidden_dim: int, num_embeddings: int, num_max_positions: int, num_heads: int, num_layers: int, dropout: float,
                 causal: bool):
        super().__init__()
        self.causal: bool = causal
        self.tokens_embeddings: torch.nn.Embedding = torch.nn.Embedding(num_embeddings, embed_dim)
        self.position_embeddings: torch.nn.Embedding = torch.nn.Embedding(num_max_positions, embed_dim)
        self.dropout: torch.nn.Dropout = torch.nn.Dropout(dropout)

        self.attentions, self.feed_forwards = torch.nn.ModuleList(), torch.nn.ModuleList()
        self.layer_norms_1, self.layer_norms_2 = torch.nn.ModuleList(), torch.nn.ModuleList()
        for _ in range(num_layers):
            self.attentions.append(torch.nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout))
            self.feed_forwards.append(torch.nn.Sequential(torch.nn.Linear(embed_dim, hidden_dim),
                                                          torch.nn.ReLU(),
                                                          torch.nn.Linear(hidden_dim, embed_dim)))
            self.layer_norms_1.append(torch.nn.LayerNorm(embed_dim, eps=1e-12))
            self.layer_norms_2.append(torch.nn.LayerNorm(embed_dim, eps=1e-12))

        self.attn_mask = None
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-cased', do_lower_case=False) 
Example #10
Source File: input_embedding.py    From dstc8-meta-dialog with MIT License 5 votes vote down vote up
def model(self):
    """lazy model loading"""
    with MODEL_DOWNLOAD_LOCK:
      # use lock to ensure model isn't downloaded by two processes at once
      if not getattr(self, "_model", None):
        self._model = BertModel.from_pretrained('bert-base-uncased')
        self._model.eval()
      assert self._model.config.hidden_size == self.embed_dim
    if cuda_utils.CUDA_ENABLED and self.use_cuda_if_available:
      self._model.cuda()
    return self._model 
Example #11
Source File: input_embedding.py    From dstc8-meta-dialog with MIT License 5 votes vote down vote up
def tokenizer(self):
    """lazy model loading"""
    with MODEL_DOWNLOAD_LOCK:
      # use lock to ensure model isn't downloaded by two processes at once
      if not getattr(self, "_tokenizer", None):
        self._tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.pad_token = '[PAD]'
        self.bos_token = '[BOS]'
        self.eos_token = '[EOS]'
        self.unk_token = '[UNK]'
        self.pad_idx = self._tokenizer.vocab[self.pad_token]
        self.unk_idx = self._tokenizer.vocab[self.unk_token]

        # add EOS and BOS tokens to vocab by reusing unused slots
        self._tokenizer.basic_tokenizer.never_split += (self.eos_token, self.bos_token)
        vocab = self._tokenizer.vocab
        oldkey, newkey = '[unused1]', self.bos_token
        vocab = OrderedDict((newkey if k == oldkey else k, v) for k, v in vocab.items())
        oldkey, newkey = '[unused2]', self.eos_token
        vocab = OrderedDict((newkey if k == oldkey else k, v) for k, v in vocab.items())
        self._tokenizer.vocab = vocab
        self._tokenizer.wordpiece_tokenizer.vocab = vocab
        self.bos_idx = vocab[self.bos_token]
        self.eos_idx = vocab[self.eos_token]
        ids_to_tokens = OrderedDict(
            [(ids, tok) for tok, ids in vocab.items()])
        self._tokenizer.ids_to_tokens = ids_to_tokens
        self._tokenizer.wordpiece_tokenizer.ids_to_tokens = ids_to_tokens
    return self._tokenizer 
Example #12
Source File: data_builder.py    From BertSum with Apache License 2.0 5 votes vote down vote up
def __init__(self, args):
        self.args = args
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
        self.sep_vid = self.tokenizer.vocab['[SEP]']
        self.cls_vid = self.tokenizer.vocab['[CLS]']
        self.pad_vid = self.tokenizer.vocab['[PAD]'] 
Example #13
Source File: bert_pretrained_encoder.py    From lale with Apache License 2.0 5 votes vote down vote up
def __init__(self, batch_size = 32):
        # Load pre-trained model tokenizer (vocabulary)
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.max_seq_length = self.tokenizer.max_len
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = BertModel.from_pretrained('bert-base-uncased')
        self.batch_size = batch_size

    # def fit(self, X, y):
    #     # TODO: Find the right value for max sequence length
    #     return BertPretrainedEncoderImpl() 
Example #14
Source File: BERT_Model.py    From bert-sense with MIT License 5 votes vote down vote up
def __init__(self, device_number='cuda:2', use_cuda = True):
        
        self.device_number = device_number
        self.use_cuda = use_cuda
        
        self.tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')
        
        self.model = BertModel.from_pretrained('bert-large-uncased')
        self.model.eval()
        
        if use_cuda:
            self.model.to(device_number) 
Example #15
Source File: bert_toeic.py    From toeicbert with MIT License 5 votes vote down vote up
def solve(row, bertmodel='bert-base-uncased'):
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    tokenizer = BertTokenizer.from_pretrained(bertmodel)
    model = BertForMaskedLM.from_pretrained(bertmodel).to(device)
    model.eval()

    question = re.sub('\_+', ' [MASK] ', to_clean(row['question']))
    question_tokens = tokenizer.tokenize(question)
    masked_index = question_tokens.index('[MASK]')

    # make segment which is divided with sentence A or B, but we set all '0' as sentence A
    segment_ids = [0] * len(question_tokens)
    segment_tensors = torch.tensor([segment_ids]).to(device)

    # question tokens convert to ids and tensors
    question_ids = tokenizer.convert_tokens_to_ids(question_tokens)
    question_tensors = torch.tensor([question_ids]).to(device)

    candidates = [to_clean(row['1']), to_clean(row['2']), to_clean(row['3']), to_clean(row['4'])]
    predict_tensor = torch.tensor([get_score(model, tokenizer, question_tensors, segment_tensors,
                                             masked_index, candidate) for candidate in candidates])
    predict_idx = torch.argmax(predict_tensor).item()

    if 'answer' in row:
        show(row['question'], candidates, predict_idx, row['answer'])
    else:
        show(row['question'], candidates, predict_idx, None) 
Example #16
Source File: qa_sampler.py    From semanticRetrievalMRS with MIT License 5 votes vote down vote up
def inspect_upstream_eval_v1():
    bert_model_name = "bert-base-uncased"
    bert_pretrain_path = config.PRO_ROOT / '.pytorch_pretrained_bert'
    do_lower_case = True

    max_pre_context_length = 315
    max_query_length = 64
    doc_stride = 128

    is_training = True
    debug_mode = True

    d_list = common.load_jsonl(config.OPEN_SQUAD_DEV_GT)
    in_file_name = config.PRO_ROOT / 'saved_models/05-12-08:44:38_mtr_open_qa_p_level_(num_train_epochs:3)/i(2000)|e(2)|squad|top10(0.6909176915799432)|top20(0.7103122043519394)|seed(12)_eval_results.jsonl'
    cur_eval_results_list = common.load_jsonl(in_file_name)
    top_k = 10
    filter_value = 0.1
    match_type = 'string'
    tokenizer = BertTokenizer.from_pretrained(bert_model_name, do_lower_case=do_lower_case,
                                              cache_dir=bert_pretrain_path)

    fitems_dict, read_fitems_list, _ = get_open_qa_item_with_upstream_paragraphs(d_list, cur_eval_results_list, is_training,
                                                                                 tokenizer, max_pre_context_length, max_query_length, doc_stride,
                                                                                 debug_mode, top_k, filter_value, match_type)
    print(len(read_fitems_list))
    print(len(fitems_dict)) 
Example #17
Source File: trainer.py    From mrqa with Apache License 2.0 5 votes vote down vote up
def __init__(self, args):
        self.args = args
        self.set_random_seed(random_seed=args.random_seed)

        self.tokenizer = BertTokenizer.from_pretrained(args.bert_model,
                                                       do_lower_case=args.do_lower_case)
        if args.debug:
            print("Debugging mode on.")
        self.features_lst = self.get_features(self.args.train_folder, self.args.debug) 
Example #18
Source File: bert_model_runtime.py    From botbuilder-python with MIT License 5 votes vote down vote up
def _load_model(self) -> None:
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() and not self.no_cuda else "cpu"
        )
        self.n_gpu = torch.cuda.device_count()

        # Load a trained model and vocabulary that you have fine-tuned
        self.model = BertForSequenceClassification.from_pretrained(
            self.model_dir, num_labels=self.num_labels
        )
        self.tokenizer = BertTokenizer.from_pretrained(
            self.model_dir, do_lower_case=self.do_lower_case
        )
        self.model.to(self.device) 
Example #19
Source File: prebert.py    From ner_with_dependency with GNU General Public License v3.0 5 votes vote down vote up
def load_bert():
    # Load pre-trained model tokenizer (vocabulary)
    tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
    model = BertModel.from_pretrained('bert-base-cased')
    model.eval()
    model.to(device)
    return tokenizer, model 
Example #20
Source File: dataHelper.py    From TextClassificationBenchmark with MIT License 5 votes vote down vote up
def load_vocab_from_bert(bert_base):
    
    
    bert_vocab_dir = os.path.join(bert_base,"vocab.txt")
    alphabet = Alphabet(start_feature_id = 0,alphabet_type="bert")

    from pytorch_pretrained_bert import BertTokenizer

    # Load pre-trained model tokenizer (vocabulary)
    tokenizer = BertTokenizer.from_pretrained(bert_vocab_dir)
    for index,word in tokenizer.ids_to_tokens.items():
        alphabet.add(word)
    return alphabet,tokenizer 
Example #21
Source File: bert_servant.py    From combine-FEVER-NSMN with MIT License 5 votes vote down vote up
def __init__(self, bert_type_name='') -> None:
        super().__init__()
        self.bert_type_name = bert_type_name

        self.bert_tokenizer = BertTokenizer.from_pretrained(self.bert_type_name)

        self.bert_model: BertModel = BertModel.from_pretrained(self.bert_type_name)
        self.bert_model.eval() 
Example #22
Source File: bert_tokenizer.py    From sciwing with MIT License 5 votes vote down vote up
def __init__(self, bert_type: str, do_basic_tokenize=True):
        super(TokenizerForBert, self).__init__()
        self.bert_type = bert_type
        self.do_basic_tokenize = do_basic_tokenize
        self.msg_printer = wasabi.Printer()
        self.allowed_bert_types = [
            "bert-base-uncased",
            "bert-large-uncased",
            "bert-base-cased",
            "bert-large-cased",
            "scibert-base-cased",
            "scibert-sci-cased",
            "scibert-base-uncased",
            "scibert-sci-uncased",
        ]
        self.scibert_foldername_mapping = {
            "scibert-base-cased": "scibert_basevocab_cased",
            "scibert-sci-cased": "scibert_scivocab_cased",
            "scibert-base-uncased": "scibert_basevocab_uncased",
            "scibert-sci-uncased": "scibert_scivocab_uncased",
        }
        assert bert_type in self.allowed_bert_types, self.msg_printer.fail(
            f"You passed {bert_type} for attribute bert_type."
            f"The allowed types are {self.allowed_bert_types}"
        )
        self.vocab_type_or_filename = None
        if "scibert" in self.bert_type:
            foldername = self.scibert_foldername_mapping[self.bert_type]
            self.vocab_type_or_filename = os.path.join(
                EMBEDDING_CACHE_DIR, foldername, "vocab.txt"
            )
        else:
            self.vocab_type_or_filename = self.bert_type

        with self.msg_printer.loading("Loading Bert model"):
            self.tokenizer = BertTokenizer.from_pretrained(
                self.vocab_type_or_filename, do_basic_tokenize=do_basic_tokenize
            ) 
Example #23
Source File: parse_nk.py    From self-attentive-parser with MIT License 5 votes vote down vote up
def get_bert(bert_model, bert_do_lower_case):
    # Avoid a hard dependency on BERT by only importing it if it's being used
    from pytorch_pretrained_bert import BertTokenizer, BertModel
    if bert_model.endswith('.tar.gz'):
        tokenizer = BertTokenizer.from_pretrained(bert_model.replace('.tar.gz', '-vocab.txt'), do_lower_case=bert_do_lower_case)
    else:
        tokenizer = BertTokenizer.from_pretrained(bert_model, do_lower_case=bert_do_lower_case)
    bert = BertModel.from_pretrained(bert_model)
    return tokenizer, bert

# %% 
Example #24
Source File: bert.py    From transfer-nlp with MIT License 5 votes vote down vote up
def bert_model(pretrained_model_name_or_path: str = 'bert-base-uncased', num_labels: int = 4):
    return BertForSequenceClassification.from_pretrained(pretrained_model_name_or_path=pretrained_model_name_or_path, num_labels=num_labels) 
Example #25
Source File: bert.py    From transfer-nlp with MIT License 5 votes vote down vote up
def __init__(self, data_file: str, bert_version: str):
        super().__init__(data_file=data_file)
        self.tokenizer = BertTokenizer.from_pretrained(bert_version)
        df = pd.read_csv(data_file)
        self.target_vocab = Vocabulary(add_unk=False)
        self.target_vocab.add_many(set(df.category)) 
Example #26
Source File: model.py    From transfer-nlp with MIT License 5 votes vote down vote up
def __init__(self, adapters_dim: int,
                 embed_dim: int, hidden_dim: int, num_max_positions: int, num_heads: int, num_layers: int, dropout: float, causal: bool,
                 initializer_range: float, num_classes: int):
        """ Transformer with a classification head and adapters. """
        super().__init__()
        self.initializer_range: float = initializer_range
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-cased', do_lower_case=False)
        num_embeddings = len(self.tokenizer.vocab)
        self.num_layers = num_layers
        self.transformer: TransformerWithAdapters = TransformerWithAdapters(adapters_dim, embed_dim, hidden_dim, num_embeddings,
                                       num_max_positions, num_heads, num_layers,
                                       dropout, causal=causal)

        self.classification_head = torch.nn.Linear(embed_dim, num_classes)
        self.apply(self.init_weights) 
Example #27
Source File: model.py    From transfer-nlp with MIT License 5 votes vote down vote up
def __init__(self,
                 embed_dim: int, hidden_dim: int, num_max_positions: int, num_heads: int, num_layers: int, dropout: float, causal: bool,
                 initializer_range: float, num_classes: int):
        super().__init__()
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-cased', do_lower_case=False)
        num_embeddings = len(self.tokenizer.vocab)
        self.initializer_range = initializer_range
        self.transformer = Transformer(embed_dim, hidden_dim, num_embeddings,
                                       num_max_positions, num_heads, num_layers,
                                       dropout, causal=causal)

        self.classification_head = torch.nn.Linear(embed_dim, num_classes)

        self.apply(self.init_weights) 
Example #28
Source File: model.py    From transfer-nlp with MIT License 5 votes vote down vote up
def __init__(self, embed_dim: int, hidden_dim: int, num_max_positions: int, num_heads: int, num_layers: int, dropout: float, causal: bool,
                 initializer_range: float):
        """ Transformer with a language modeling head on top (tied weights) """
        super().__init__()
        tokenizer = BertTokenizer.from_pretrained('bert-base-cased', do_lower_case=False)
        num_embeddings = len(tokenizer.vocab)
        self.initializer_range = initializer_range
        self.transformer = Transformer(embed_dim, hidden_dim, num_embeddings,
                                       num_max_positions, num_heads, num_layers,
                                       dropout, causal=causal)

        self.lm_head = torch.nn.Linear(embed_dim, num_embeddings, bias=False)
        self.apply(self.init_weights)
        self.tie_weights() 
Example #29
Source File: model.py    From transfer-nlp with MIT License 5 votes vote down vote up
def __init__(self, adapters_dim: int,
                 embed_dim: int, hidden_dim: int, num_max_positions: int, num_heads: int, num_layers: int, dropout: float, causal: bool,
                 initializer_range: float, num_classes: int):
        """ Transformer with a classification head and adapters. """
        super().__init__()
        self.initializer_range: float = initializer_range
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-cased', do_lower_case=False)
        num_embeddings = len(self.tokenizer.vocab)
        self.num_layers = num_layers
        self.transformer: TransformerWithAdapters = TransformerWithAdapters(adapters_dim, embed_dim, hidden_dim, num_embeddings,
                                       num_max_positions, num_heads, num_layers,
                                       dropout, causal=causal)

        self.classification_head = torch.nn.Linear(embed_dim, num_classes)
        self.apply(self.init_weights) 
Example #30
Source File: model.py    From transfer-nlp with MIT License 5 votes vote down vote up
def __init__(self,
                 embed_dim: int, hidden_dim: int, num_max_positions: int, num_heads: int, num_layers: int, dropout: float, causal: bool,
                 initializer_range: float, num_classes: int):
        super().__init__()
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-cased', do_lower_case=False)
        num_embeddings = len(self.tokenizer.vocab)
        self.initializer_range = initializer_range
        self.transformer = Transformer(embed_dim, hidden_dim, num_embeddings,
                                       num_max_positions, num_heads, num_layers,
                                       dropout, causal=causal)

        self.classification_head = torch.nn.Linear(embed_dim, num_classes)

        self.apply(self.init_weights)