Python torch.nn.TransformerEncoder() Examples
The following are 11
code examples of torch.nn.TransformerEncoder().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.nn
, or try the search function
.
Example #1
Source File: model.py From examples with BSD 3-Clause "New" or "Revised" License | 7 votes |
def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.5): super(TransformerModel, self).__init__() try: from torch.nn import TransformerEncoder, TransformerEncoderLayer except: raise ImportError('TransformerEncoder module does not exist in PyTorch 1.1 or lower.') self.model_type = 'Transformer' self.src_mask = None self.pos_encoder = PositionalEncoding(ninp, dropout) encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout) self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers) self.encoder = nn.Embedding(ntoken, ninp) self.ninp = ninp self.decoder = nn.Linear(ninp, ntoken) self.init_weights()
Example #2
Source File: py_Transformer.py From NLP_Toolkit with Apache License 2.0 | 6 votes |
def __init__(self, src_vocab, trg_vocab, trg_vocab2, d_model, ff_dim, num, n_heads,\ max_encoder_len, max_decoder_len, mappings, idx_mappings): super(pyTransformer, self).__init__() self.src_vocab = src_vocab self.trg_vocab = trg_vocab self.trg_vocab2 = trg_vocab2 self.d_model = d_model self.ff_dim = ff_dim self.num = num self.n_heads = n_heads self.max_encoder_len = max_encoder_len self.max_decoder_len = max_decoder_len self.mappings = mappings self.idx_mappings = idx_mappings self.embed1 = nn.Embedding(src_vocab, d_model) self.embed2 = nn.Embedding(trg_vocab, d_model) #self.transformer = nn.Transformer(d_model=d_model, nhead=n_heads, num_encoder_layers=num,\ # num_decoder_layers=num, dim_feedforward=ff_dim, dropout=0.1) self.encoder = nn.TransformerEncoder(nn.TransformerEncoderLayer(d_model, n_heads, ff_dim, dropout=0.1), \ num_layers=num, norm=nn.LayerNorm(normalized_shape=d_model, eps=1e-6)) self.decoder = nn.TransformerDecoder(nn.TransformerDecoderLayer(d_model, n_heads, ff_dim, dropout=0.1),\ num_layers=num, norm=nn.LayerNorm(normalized_shape=d_model, eps=1e-6)) self.fc1 = nn.Linear(d_model, trg_vocab) self.fc2 = nn.Linear(d_model, trg_vocab2)
Example #3
Source File: seq2seq_transformer.py From MultiTurnDialogZoo with MIT License | 6 votes |
def __init__(self, input_vocab_size, opt_vocab_size, d_model, nhead, num_encoder_layers, dim_feedforward, position_embed_size=300, utter_n_layer=2, dropout=0.3, sos=0, pad=0, teach_force=1): super(Transformer, self).__init__() self.d_model = d_model self.hidden_size = d_model self.embed_src = nn.Embedding(input_vocab_size, d_model) # position maxlen is 5000 self.pos_enc = PositionEmbedding(d_model, dropout=dropout, max_len=position_embed_size) self.input_vocab_size = input_vocab_size self.utter_n_layer = utter_n_layer self.opt_vocab_size = opt_vocab_size self.pad, self.sos = pad, sos self.teach_force = teach_force encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout, activation='gelu') self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_encoder_layers) self.decoder = Decoder(d_model, d_model, opt_vocab_size, n_layers=utter_n_layer, dropout=dropout, nhead=nhead)
Example #4
Source File: pytorch_U2GNN_UnSup.py From Graph-Transformer with Apache License 2.0 | 6 votes |
def __init__(self, vocab_size, feature_dim_size, ff_hidden_size, sampled_num, num_self_att_layers, num_U2GNN_layers, dropout, device): super(TransformerU2GNN, self).__init__() self.feature_dim_size = feature_dim_size self.ff_hidden_size = ff_hidden_size self.num_self_att_layers = num_self_att_layers #Each U2GNN layer consists of a number of self-attention layers self.num_U2GNN_layers = num_U2GNN_layers self.vocab_size = vocab_size self.sampled_num = sampled_num self.device = device # self.u2gnn_layers = torch.nn.ModuleList() for _ in range(self.num_U2GNN_layers): encoder_layers = TransformerEncoderLayer(d_model=self.feature_dim_size, nhead=1, dim_feedforward=self.ff_hidden_size, dropout=0.5) # embed_dim must be divisible by num_heads self.u2gnn_layers.append(TransformerEncoder(encoder_layers, self.num_self_att_layers)) # Linear function self.dropouts = nn.Dropout(dropout) self.ss = SampledSoftmax(self.vocab_size, self.sampled_num, self.feature_dim_size*self.num_U2GNN_layers, self.device)
Example #5
Source File: pytorch_U2GNN_Sup.py From Graph-Transformer with Apache License 2.0 | 6 votes |
def __init__(self, feature_dim_size, ff_hidden_size, num_classes, num_self_att_layers, dropout, num_U2GNN_layers): super(TransformerU2GNN, self).__init__() self.feature_dim_size = feature_dim_size self.ff_hidden_size = ff_hidden_size self.num_classes = num_classes self.num_self_att_layers = num_self_att_layers #Each U2GNN layer consists of a number of self-attention layers self.num_U2GNN_layers = num_U2GNN_layers # self.u2gnn_layers = torch.nn.ModuleList() for _ in range(self.num_U2GNN_layers): encoder_layers = TransformerEncoderLayer(d_model=self.feature_dim_size, nhead=1, dim_feedforward=self.ff_hidden_size, dropout=0.5) self.u2gnn_layers.append(TransformerEncoder(encoder_layers, self.num_self_att_layers)) # Linear function self.predictions = torch.nn.ModuleList() self.dropouts = torch.nn.ModuleList() # self.predictions.append(nn.Linear(feature_dim_size, num_classes)) # For including feature vectors to predict graph labels??? for _ in range(self.num_U2GNN_layers): self.predictions.append(nn.Linear(self.feature_dim_size, self.num_classes)) self.dropouts.append(nn.Dropout(dropout))
Example #6
Source File: transformer.py From pykaldi2 with MIT License | 6 votes |
def __init__(self, dim_feat, dim_model, nheads, dim_feedforward, nlayers, dropout, output_size, kernel_size=3, stride=1): super(TransformerAM, self).__init__() self.pos_encoder = PositionalEncoding(dim_model, dropout) self.input_layer = nn.Linear(dim_feat, dim_model) self.output_layer = nn.Linear(dim_model, output_size) encoder_norm = nn.LayerNorm(dim_model) encoder_layer = TransformerEncoderLayerWithConv1d(dim_model, nheads, dim_feedforward, dropout, kernel_size, stride) self.transformer = nn.TransformerEncoder(encoder_layer, nlayers, norm=encoder_norm)
Example #7
Source File: model.py From PyTorch with MIT License | 6 votes |
def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.5): super(TransformerModel, self).__init__() try: from torch.nn import TransformerEncoder, TransformerEncoderLayer except: raise ImportError('TransformerEncoder module does not exist in PyTorch 1.1 or lower.') self.model_type = 'Transformer' self.src_mask = None self.pos_encoder = PositionalEncoding(ninp, dropout) encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout) self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers) self.encoder = nn.Embedding(ntoken, ninp) self.ninp = ninp self.decoder = nn.Linear(ninp, ntoken) self.init_weights()
Example #8
Source File: model.py From CoupletAI with MIT License | 5 votes |
def __init__(self, vocab_size: int, embed_dim: int, hidden_dim: int): super().__init__() self.embedding = nn.Embedding(vocab_size, embed_dim) self.hidden2tag = nn.Linear(hidden_dim, vocab_size) self.mapper = nn.Linear(embed_dim, hidden_dim) layer = nn.TransformerEncoderLayer(hidden_dim, 4, dim_feedforward=512) self.encoder = nn.TransformerEncoder(layer, 4)
Example #9
Source File: tk_native.py From transformer-kernel-ranking with Apache License 2.0 | 5 votes |
def __init__(self, _embsize:int, kernels_mu: List[float], kernels_sigma: List[float], att_heads: int, att_layer: int, att_proj_dim: int, att_ff_dim: int): super(TK_Native_v1, self).__init__() n_kernels = len(kernels_mu) if len(kernels_mu) != len(kernels_sigma): raise Exception("len(kernels_mu) != len(kernels_sigma)") # static - kernel size & magnitude variables self.mu = Variable(torch.cuda.FloatTensor(kernels_mu), requires_grad=False).view(1, 1, 1, n_kernels) self.sigma = Variable(torch.cuda.FloatTensor(kernels_sigma), requires_grad=False).view(1, 1, 1, n_kernels) self.nn_scaler = nn.Parameter(torch.full([1], 0.01, dtype=torch.float32, requires_grad=True)) self.mixer = nn.Parameter(torch.full([1,1,1], 0.5, dtype=torch.float32, requires_grad=True)) encoder_layer = nn.TransformerEncoderLayer(_embsize, att_heads, dim_feedforward=att_ff_dim, dropout=0) self.contextualizer = nn.TransformerEncoder(encoder_layer, att_layer, norm=None) # this does not really do "attention" - just a plain cosine matrix calculation (without learnable weights) self.cosine_module = CosineMatrixAttention() # bias is set to True in original code (we found it to not help, how could it?) self.dense = nn.Linear(n_kernels, 1, bias=False) self.dense_mean = nn.Linear(n_kernels, 1, bias=False) self.dense_comb = nn.Linear(2, 1, bias=False) # init with small weights, otherwise the dense output is way to high for the tanh -> resulting in loss == 1 all the time torch.nn.init.uniform_(self.dense.weight, -0.014, 0.014) # inits taken from matchzoo torch.nn.init.uniform_(self.dense_mean.weight, -0.014, 0.014) # inits taken from matchzoo # init with small weights, otherwise the dense output is way to high for the tanh -> resulting in loss == 1 all the time torch.nn.init.uniform_(self.dense.weight, -0.014, 0.014) # inits taken from matchzoo #self.dense.bias.data.fill_(0.0)
Example #10
Source File: pytorch_transformer_wrapper.py From allennlp with Apache License 2.0 | 4 votes |
def __init__( self, input_dim: int, num_layers: int, feedforward_hidden_dim: int = 2048, num_attention_heads: int = 8, positional_encoding: Optional[str] = None, positional_embedding_size: int = 512, dropout_prob: float = 0.1, activation: str = "relu", ) -> None: super().__init__() layer = nn.TransformerEncoderLayer( d_model=input_dim, nhead=num_attention_heads, dim_feedforward=feedforward_hidden_dim, dropout=dropout_prob, activation=activation, ) self._transformer = nn.TransformerEncoder(layer, num_layers) self._input_dim = input_dim # initialize parameters # We do this before the embeddings are initialized so we get the default initialization for the embeddings. for p in self.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) if positional_encoding is None: self._sinusoidal_positional_encoding = False self._positional_embedding = None elif positional_encoding == "sinusoidal": self._sinusoidal_positional_encoding = True self._positional_embedding = None elif positional_encoding == "embedding": self._sinusoidal_positional_encoding = False self._positional_embedding = nn.Embedding(positional_embedding_size, input_dim) else: raise ValueError( "positional_encoding must be one of None, 'sinusoidal', or 'embedding'" )
Example #11
Source File: torch_transformer_encoder.py From summarus with Apache License 2.0 | 4 votes |
def __init__( self, input_dim: int, num_layers: int, feedforward_hidden_dim: int = 2048, num_attention_heads: int = 8, positional_encoding: Optional[str] = None, positional_embedding_size: int = 512, dropout_prob: float = 0.1, activation: str = "relu", ) -> None: super().__init__() layer = nn.TransformerEncoderLayer( d_model=input_dim, nhead=num_attention_heads, dim_feedforward=feedforward_hidden_dim, dropout=dropout_prob, activation=activation, ) self._transformer = nn.TransformerEncoder(layer, num_layers) self._input_dim = input_dim # initialize parameters # We do this before the embeddings are initialized so we get the default initialization for the embeddings. for p in self.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) if positional_encoding is None: self._sinusoidal_positional_encoding = False self._positional_embedding = None elif positional_encoding == "sinusoidal": self._sinusoidal_positional_encoding = True self._positional_embedding = None elif positional_encoding == "embedding": self._sinusoidal_positional_encoding = False self._positional_embedding = nn.Embedding(positional_embedding_size, input_dim) else: raise ValueError( "positional_encoding must be one of None, 'sinusoidal', or 'embedding'" )