Python torch.nn.TransformerEncoderLayer() Examples
The following are 11
code examples of torch.nn.TransformerEncoderLayer().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.nn
, or try the search function
.
Example #1
Source File: model.py From examples with BSD 3-Clause "New" or "Revised" License | 7 votes |
def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.5): super(TransformerModel, self).__init__() try: from torch.nn import TransformerEncoder, TransformerEncoderLayer except: raise ImportError('TransformerEncoder module does not exist in PyTorch 1.1 or lower.') self.model_type = 'Transformer' self.src_mask = None self.pos_encoder = PositionalEncoding(ninp, dropout) encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout) self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers) self.encoder = nn.Embedding(ntoken, ninp) self.ninp = ninp self.decoder = nn.Linear(ninp, ntoken) self.init_weights()
Example #2
Source File: seq2seq_transformer.py From MultiTurnDialogZoo with MIT License | 6 votes |
def __init__(self, input_vocab_size, opt_vocab_size, d_model, nhead, num_encoder_layers, dim_feedforward, position_embed_size=300, utter_n_layer=2, dropout=0.3, sos=0, pad=0, teach_force=1): super(Transformer, self).__init__() self.d_model = d_model self.hidden_size = d_model self.embed_src = nn.Embedding(input_vocab_size, d_model) # position maxlen is 5000 self.pos_enc = PositionEmbedding(d_model, dropout=dropout, max_len=position_embed_size) self.input_vocab_size = input_vocab_size self.utter_n_layer = utter_n_layer self.opt_vocab_size = opt_vocab_size self.pad, self.sos = pad, sos self.teach_force = teach_force encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout, activation='gelu') self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_encoder_layers) self.decoder = Decoder(d_model, d_model, opt_vocab_size, n_layers=utter_n_layer, dropout=dropout, nhead=nhead)
Example #3
Source File: pytorch_U2GNN_UnSup.py From Graph-Transformer with Apache License 2.0 | 6 votes |
def __init__(self, vocab_size, feature_dim_size, ff_hidden_size, sampled_num, num_self_att_layers, num_U2GNN_layers, dropout, device): super(TransformerU2GNN, self).__init__() self.feature_dim_size = feature_dim_size self.ff_hidden_size = ff_hidden_size self.num_self_att_layers = num_self_att_layers #Each U2GNN layer consists of a number of self-attention layers self.num_U2GNN_layers = num_U2GNN_layers self.vocab_size = vocab_size self.sampled_num = sampled_num self.device = device # self.u2gnn_layers = torch.nn.ModuleList() for _ in range(self.num_U2GNN_layers): encoder_layers = TransformerEncoderLayer(d_model=self.feature_dim_size, nhead=1, dim_feedforward=self.ff_hidden_size, dropout=0.5) # embed_dim must be divisible by num_heads self.u2gnn_layers.append(TransformerEncoder(encoder_layers, self.num_self_att_layers)) # Linear function self.dropouts = nn.Dropout(dropout) self.ss = SampledSoftmax(self.vocab_size, self.sampled_num, self.feature_dim_size*self.num_U2GNN_layers, self.device)
Example #4
Source File: pytorch_U2GNN_Sup.py From Graph-Transformer with Apache License 2.0 | 6 votes |
def __init__(self, feature_dim_size, ff_hidden_size, num_classes, num_self_att_layers, dropout, num_U2GNN_layers): super(TransformerU2GNN, self).__init__() self.feature_dim_size = feature_dim_size self.ff_hidden_size = ff_hidden_size self.num_classes = num_classes self.num_self_att_layers = num_self_att_layers #Each U2GNN layer consists of a number of self-attention layers self.num_U2GNN_layers = num_U2GNN_layers # self.u2gnn_layers = torch.nn.ModuleList() for _ in range(self.num_U2GNN_layers): encoder_layers = TransformerEncoderLayer(d_model=self.feature_dim_size, nhead=1, dim_feedforward=self.ff_hidden_size, dropout=0.5) self.u2gnn_layers.append(TransformerEncoder(encoder_layers, self.num_self_att_layers)) # Linear function self.predictions = torch.nn.ModuleList() self.dropouts = torch.nn.ModuleList() # self.predictions.append(nn.Linear(feature_dim_size, num_classes)) # For including feature vectors to predict graph labels??? for _ in range(self.num_U2GNN_layers): self.predictions.append(nn.Linear(self.feature_dim_size, self.num_classes)) self.dropouts.append(nn.Dropout(dropout))
Example #5
Source File: model.py From PyTorch with MIT License | 6 votes |
def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.5): super(TransformerModel, self).__init__() try: from torch.nn import TransformerEncoder, TransformerEncoderLayer except: raise ImportError('TransformerEncoder module does not exist in PyTorch 1.1 or lower.') self.model_type = 'Transformer' self.src_mask = None self.pos_encoder = PositionalEncoding(ninp, dropout) encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout) self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers) self.encoder = nn.Embedding(ntoken, ninp) self.ninp = ninp self.decoder = nn.Linear(ninp, ntoken) self.init_weights()
Example #6
Source File: model.py From CoupletAI with MIT License | 5 votes |
def __init__(self, vocab_size: int, embed_dim: int, hidden_dim: int): super().__init__() self.embedding = nn.Embedding(vocab_size, embed_dim) self.hidden2tag = nn.Linear(hidden_dim, vocab_size) self.mapper = nn.Linear(embed_dim, hidden_dim) layer = nn.TransformerEncoderLayer(hidden_dim, 4, dim_feedforward=512) self.encoder = nn.TransformerEncoder(layer, 4)
Example #7
Source File: transformer.py From pykaldi2 with MIT License | 5 votes |
def __init__(self, dim_model, nheads, dim_feedforward, dropout, kernel_size, stride): super(TransformerEncoderLayerWithConv1d, self).__init__() self.encoder_layer = nn.TransformerEncoderLayer(dim_model, nheads, dim_feedforward, dropout) self.conv1d = nn.Conv1d(dim_model, dim_model, kernel_size, stride=stride, padding=1)
Example #8
Source File: tk_native.py From transformer-kernel-ranking with Apache License 2.0 | 5 votes |
def __init__(self, _embsize:int, kernels_mu: List[float], kernels_sigma: List[float], att_heads: int, att_layer: int, att_proj_dim: int, att_ff_dim: int): super(TK_Native_v1, self).__init__() n_kernels = len(kernels_mu) if len(kernels_mu) != len(kernels_sigma): raise Exception("len(kernels_mu) != len(kernels_sigma)") # static - kernel size & magnitude variables self.mu = Variable(torch.cuda.FloatTensor(kernels_mu), requires_grad=False).view(1, 1, 1, n_kernels) self.sigma = Variable(torch.cuda.FloatTensor(kernels_sigma), requires_grad=False).view(1, 1, 1, n_kernels) self.nn_scaler = nn.Parameter(torch.full([1], 0.01, dtype=torch.float32, requires_grad=True)) self.mixer = nn.Parameter(torch.full([1,1,1], 0.5, dtype=torch.float32, requires_grad=True)) encoder_layer = nn.TransformerEncoderLayer(_embsize, att_heads, dim_feedforward=att_ff_dim, dropout=0) self.contextualizer = nn.TransformerEncoder(encoder_layer, att_layer, norm=None) # this does not really do "attention" - just a plain cosine matrix calculation (without learnable weights) self.cosine_module = CosineMatrixAttention() # bias is set to True in original code (we found it to not help, how could it?) self.dense = nn.Linear(n_kernels, 1, bias=False) self.dense_mean = nn.Linear(n_kernels, 1, bias=False) self.dense_comb = nn.Linear(2, 1, bias=False) # init with small weights, otherwise the dense output is way to high for the tanh -> resulting in loss == 1 all the time torch.nn.init.uniform_(self.dense.weight, -0.014, 0.014) # inits taken from matchzoo torch.nn.init.uniform_(self.dense_mean.weight, -0.014, 0.014) # inits taken from matchzoo # init with small weights, otherwise the dense output is way to high for the tanh -> resulting in loss == 1 all the time torch.nn.init.uniform_(self.dense.weight, -0.014, 0.014) # inits taken from matchzoo #self.dense.bias.data.fill_(0.0)
Example #9
Source File: absa_layer.py From BERT-E2E-ABSA with Apache License 2.0 | 4 votes |
def __init__(self, bert_config): """ :param bert_config: configuration for bert model """ super(BertABSATagger, self).__init__(bert_config) self.num_labels = bert_config.num_labels self.tagger_config = TaggerConfig() self.tagger_config.absa_type = bert_config.absa_type.lower() if bert_config.tfm_mode == 'finetune': # initialized with pre-trained BERT and perform finetuning # print("Fine-tuning the pre-trained BERT...") self.bert = BertModel(bert_config) else: raise Exception("Invalid transformer mode %s!!!" % bert_config.tfm_mode) self.bert_dropout = nn.Dropout(bert_config.hidden_dropout_prob) # fix the parameters in BERT and regard it as feature extractor if bert_config.fix_tfm: # fix the parameters of the (pre-trained or randomly initialized) transformers during fine-tuning for p in self.bert.parameters(): p.requires_grad = False self.tagger = None if self.tagger_config.absa_type == 'linear': # hidden size at the penultimate layer penultimate_hidden_size = bert_config.hidden_size else: self.tagger_dropout = nn.Dropout(self.tagger_config.hidden_dropout_prob) if self.tagger_config.absa_type == 'lstm': self.tagger = LSTM(input_size=bert_config.hidden_size, hidden_size=self.tagger_config.hidden_size, bidirectional=self.tagger_config.bidirectional) elif self.tagger_config.absa_type == 'gru': self.tagger = GRU(input_size=bert_config.hidden_size, hidden_size=self.tagger_config.hidden_size, bidirectional=self.tagger_config.bidirectional) elif self.tagger_config.absa_type == 'tfm': # transformer encoder layer self.tagger = nn.TransformerEncoderLayer(d_model=bert_config.hidden_size, nhead=12, dim_feedforward=4*bert_config.hidden_size, dropout=0.1) elif self.tagger_config.absa_type == 'san': # vanilla self attention networks self.tagger = SAN(d_model=bert_config.hidden_size, nhead=12, dropout=0.1) elif self.tagger_config.absa_type == 'crf': self.tagger = CRF(num_tags=self.num_labels) else: raise Exception('Unimplemented downstream tagger %s...' % self.tagger_config.absa_type) penultimate_hidden_size = self.tagger_config.hidden_size self.classifier = nn.Linear(penultimate_hidden_size, bert_config.num_labels)
Example #10
Source File: pytorch_transformer_wrapper.py From allennlp with Apache License 2.0 | 4 votes |
def __init__( self, input_dim: int, num_layers: int, feedforward_hidden_dim: int = 2048, num_attention_heads: int = 8, positional_encoding: Optional[str] = None, positional_embedding_size: int = 512, dropout_prob: float = 0.1, activation: str = "relu", ) -> None: super().__init__() layer = nn.TransformerEncoderLayer( d_model=input_dim, nhead=num_attention_heads, dim_feedforward=feedforward_hidden_dim, dropout=dropout_prob, activation=activation, ) self._transformer = nn.TransformerEncoder(layer, num_layers) self._input_dim = input_dim # initialize parameters # We do this before the embeddings are initialized so we get the default initialization for the embeddings. for p in self.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) if positional_encoding is None: self._sinusoidal_positional_encoding = False self._positional_embedding = None elif positional_encoding == "sinusoidal": self._sinusoidal_positional_encoding = True self._positional_embedding = None elif positional_encoding == "embedding": self._sinusoidal_positional_encoding = False self._positional_embedding = nn.Embedding(positional_embedding_size, input_dim) else: raise ValueError( "positional_encoding must be one of None, 'sinusoidal', or 'embedding'" )
Example #11
Source File: torch_transformer_encoder.py From summarus with Apache License 2.0 | 4 votes |
def __init__( self, input_dim: int, num_layers: int, feedforward_hidden_dim: int = 2048, num_attention_heads: int = 8, positional_encoding: Optional[str] = None, positional_embedding_size: int = 512, dropout_prob: float = 0.1, activation: str = "relu", ) -> None: super().__init__() layer = nn.TransformerEncoderLayer( d_model=input_dim, nhead=num_attention_heads, dim_feedforward=feedforward_hidden_dim, dropout=dropout_prob, activation=activation, ) self._transformer = nn.TransformerEncoder(layer, num_layers) self._input_dim = input_dim # initialize parameters # We do this before the embeddings are initialized so we get the default initialization for the embeddings. for p in self.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) if positional_encoding is None: self._sinusoidal_positional_encoding = False self._positional_embedding = None elif positional_encoding == "sinusoidal": self._sinusoidal_positional_encoding = True self._positional_embedding = None elif positional_encoding == "embedding": self._sinusoidal_positional_encoding = False self._positional_embedding = nn.Embedding(positional_embedding_size, input_dim) else: raise ValueError( "positional_encoding must be one of None, 'sinusoidal', or 'embedding'" )