Python tensor2tensor.models.transformer.transformer_ffn_layer() Examples
The following are 14
code examples of tensor2tensor.models.transformer.transformer_ffn_layer().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensor2tensor.models.transformer
, or try the search function
.
Example #1
Source File: universal_transformer_util.py From fine-lm with MIT License | 5 votes |
def transformer_decoder_ffn_unit(x, hparams, nonpadding_mask=None): """Applies a feed-forward function which is parametrised for decoding. Args: x: input hparams: model hyper-parameters nonpadding_mask: optional Tensor with shape [batch_size, encoder_length] indicating what positions are not padding. This is used to mask out padding in convoltutional layers. We generally only need this mask for "packed" datasets, because for ordinary datasets, no padding is ever followed by nonpadding. Returns: the output tensor """ with tf.variable_scope("ffn"): if hparams.transformer_ffn_type == "fc": y = transformer.transformer_ffn_layer( common_layers.layer_preprocess(x, hparams), hparams, conv_padding="LEFT", nonpadding_mask=nonpadding_mask) if hparams.transformer_ffn_type == "sepconv": y = common_layers.sepconv_relu_sepconv( common_layers.layer_preprocess(x, hparams), filter_size=hparams.filter_size, output_size=hparams.hidden_size, first_kernel_size=(3, 1), second_kernel_size=(5, 1), padding="LEFT", nonpadding_mask=nonpadding_mask, dropout=hparams.relu_dropout) x = common_layers.layer_postprocess(x, y, hparams) return x
Example #2
Source File: universal_transformer_util.py From tensor2tensor with Apache License 2.0 | 5 votes |
def transformer_decoder_ffn_unit(x, hparams, nonpadding_mask=None): """Applies a feed-forward function which is parametrised for decoding. Args: x: input hparams: model hyper-parameters nonpadding_mask: optional Tensor with shape [batch_size, encoder_length] indicating what positions are not padding. This is used to mask out padding in convoltutional layers. We generally only need this mask for "packed" datasets, because for ordinary datasets, no padding is ever followed by nonpadding. Returns: the output tensor """ with tf.variable_scope("ffn"): if hparams.transformer_ffn_type == "fc": y = transformer.transformer_ffn_layer( common_layers.layer_preprocess(x, hparams), hparams, conv_padding="LEFT", nonpadding_mask=nonpadding_mask) if hparams.transformer_ffn_type == "sepconv": y = common_layers.sepconv_relu_sepconv( common_layers.layer_preprocess(x, hparams), filter_size=hparams.filter_size, output_size=hparams.hidden_size, first_kernel_size=(3, 1), second_kernel_size=(5, 1), padding="LEFT", nonpadding_mask=nonpadding_mask, dropout=hparams.relu_dropout) x = common_layers.layer_postprocess(x, y, hparams) return x
Example #3
Source File: universal_transformer_util.py From BERT with Apache License 2.0 | 5 votes |
def transformer_decoder_ffn_unit(x, hparams, nonpadding_mask=None): """Applies a feed-forward function which is parametrised for decoding. Args: x: input hparams: model hyper-parameters nonpadding_mask: optional Tensor with shape [batch_size, encoder_length] indicating what positions are not padding. This is used to mask out padding in convoltutional layers. We generally only need this mask for "packed" datasets, because for ordinary datasets, no padding is ever followed by nonpadding. Returns: the output tensor """ with tf.variable_scope("ffn"): if hparams.transformer_ffn_type == "fc": y = transformer.transformer_ffn_layer( common_layers.layer_preprocess(x, hparams), hparams, conv_padding="LEFT", nonpadding_mask=nonpadding_mask) if hparams.transformer_ffn_type == "sepconv": y = common_layers.sepconv_relu_sepconv( common_layers.layer_preprocess(x, hparams), filter_size=hparams.filter_size, output_size=hparams.hidden_size, first_kernel_size=(3, 1), second_kernel_size=(5, 1), padding="LEFT", nonpadding_mask=nonpadding_mask, dropout=hparams.relu_dropout) x = common_layers.layer_postprocess(x, y, hparams) return x
Example #4
Source File: universal_transformer_util.py From training_results_v0.5 with Apache License 2.0 | 5 votes |
def transformer_decoder_ffn_unit(x, hparams, nonpadding_mask=None): """Applies a feed-forward function which is parametrised for decoding. Args: x: input hparams: model hyper-parameters nonpadding_mask: optional Tensor with shape [batch_size, encoder_length] indicating what positions are not padding. This is used to mask out padding in convoltutional layers. We generally only need this mask for "packed" datasets, because for ordinary datasets, no padding is ever followed by nonpadding. Returns: the output tensor """ with tf.variable_scope("ffn"): if hparams.transformer_ffn_type == "fc": y = transformer.transformer_ffn_layer( common_layers.layer_preprocess(x, hparams), hparams, conv_padding="LEFT", nonpadding_mask=nonpadding_mask) if hparams.transformer_ffn_type == "sepconv": y = common_layers.sepconv_relu_sepconv( common_layers.layer_preprocess(x, hparams), filter_size=hparams.filter_size, output_size=hparams.hidden_size, first_kernel_size=(3, 1), second_kernel_size=(5, 1), padding="LEFT", nonpadding_mask=nonpadding_mask, dropout=hparams.relu_dropout) x = common_layers.layer_postprocess(x, y, hparams) return x
Example #5
Source File: universal_transformer_modified_utils.py From Graph-Transformer with Apache License 2.0 | 5 votes |
def transformer_decoder_ffn_unit(x, hparams, nonpadding_mask=None): """Applies a feed-forward function which is parametrised for decoding. Args: x: input hparams: model hyper-parameters nonpadding_mask: optional Tensor with shape [batch_size, encoder_length] indicating what positions are not padding. This is used to mask out padding in convoltutional layers. We generally only need this mask for "packed" datasets, because for ordinary datasets, no padding is ever followed by nonpadding. Returns: the output tensor """ with tf.variable_scope("ffn"): if hparams.transformer_ffn_type == "fc": y = transformer.transformer_ffn_layer( common_layers.layer_preprocess(x, hparams), hparams, conv_padding="LEFT", nonpadding_mask=nonpadding_mask) if hparams.transformer_ffn_type == "sepconv": y = common_layers.sepconv_relu_sepconv( common_layers.layer_preprocess(x, hparams), filter_size=hparams.filter_size, output_size=hparams.hidden_size, first_kernel_size=(3, 1), second_kernel_size=(5, 1), padding="LEFT", nonpadding_mask=nonpadding_mask, dropout=hparams.relu_dropout) x = common_layers.layer_postprocess(x, y, hparams) return x
Example #6
Source File: transformer_revnet.py From fine-lm with MIT License | 4 votes |
def transformer_revnet_encoder(encoder_input, encoder_self_attention_bias, hparams, name="encoder"): """A stack of transformer layers. Args: encoder_input: a Tensor encoder_self_attention_bias: bias Tensor for self-attention (see common_attention.attention_bias()) hparams: hyperparameters for model name: a string Returns: y: a Tensors """ def f(x, side_input): """f(x) for reversible layer, self-attention layer.""" encoder_self_attention_bias = side_input[0] old_hid_size = hparams.hidden_size hparams.hidden_size = old_hid_size // 2 with tf.variable_scope("self_attention"): y = common_attention.multihead_attention( common_layers.layer_preprocess( x, hparams), None, encoder_self_attention_bias, hparams.attention_key_channels or hparams.hidden_size, hparams.attention_value_channels or hparams.hidden_size, hparams.hidden_size, hparams.num_heads, hparams.attention_dropout) y = common_layers.layer_postprocess(x, y, hparams) hparams.hidden_size = old_hid_size return y def g(x): """g(x) for reversible layer, feed-forward layer.""" old_hid_size = hparams.hidden_size hparams.hidden_size = old_hid_size // 2 with tf.variable_scope("ffn"): y = transformer.transformer_ffn_layer( common_layers.layer_preprocess(x, hparams), hparams) y = common_layers.layer_postprocess(x, y, hparams) hparams.hidden_size = old_hid_size return y x1, x2 = tf.split(encoder_input, 2, axis=-1) with tf.variable_scope(name): y1, y2 = rev_block.rev_block( x1, x2, f, g, num_layers=hparams.num_hidden_layers, f_side_input=[encoder_self_attention_bias], is_training=hparams.mode == tf.estimator.ModeKeys.TRAIN) y = tf.concat([y1, y2], axis=-1) return common_layers.layer_preprocess(y, hparams)
Example #7
Source File: universal_transformer_util.py From fine-lm with MIT License | 4 votes |
def transformer_encoder_ffn_unit(x, hparams, nonpadding_mask=None, pad_remover=None): """Applies a feed-forward function which is parametrised for encoding. Args: x: input hparams: model hyper-parameters nonpadding_mask: optional Tensor with shape [batch_size, encoder_length] indicating what positions are not padding. This is used to mask out padding in convoltutional layers. We generally only need this mask for "packed" datasets, because for ordinary datasets, no padding is ever followed by nonpadding. pad_remover: to mask out padding in convolutional layers (efficiency). Returns: the output tensor """ with tf.variable_scope("ffn"): if hparams.transformer_ffn_type == "fc": y = transformer.transformer_ffn_layer( common_layers.layer_preprocess(x, hparams), hparams, pad_remover, conv_padding="SAME", nonpadding_mask=nonpadding_mask) if hparams.transformer_ffn_type == "sepconv": assert nonpadding_mask is not None, ( "The nonpadding_mask should be provided, otherwise the model uses " "the leaked padding information to estimate the length!") y = common_layers.sepconv_relu_sepconv( common_layers.layer_preprocess(x, hparams), filter_size=hparams.filter_size, output_size=hparams.hidden_size, first_kernel_size=(3, 1), second_kernel_size=(5, 1), padding="SAME", nonpadding_mask=nonpadding_mask, dropout=hparams.relu_dropout) x = common_layers.layer_postprocess(x, y, hparams) return x
Example #8
Source File: transformer_revnet.py From tensor2tensor with Apache License 2.0 | 4 votes |
def transformer_revnet_encoder(encoder_input, encoder_self_attention_bias, hparams, name="encoder"): """A stack of transformer layers. Args: encoder_input: a Tensor encoder_self_attention_bias: bias Tensor for self-attention (see common_attention.attention_bias()) hparams: hyperparameters for model name: a string Returns: y: a Tensors """ def f(x, side_input): """f(x) for reversible layer, self-attention layer.""" encoder_self_attention_bias = side_input[0] old_hid_size = hparams.hidden_size hparams.hidden_size = old_hid_size // 2 with tf.variable_scope("self_attention"): y = common_attention.multihead_attention( common_layers.layer_preprocess( x, hparams), None, encoder_self_attention_bias, hparams.attention_key_channels or hparams.hidden_size, hparams.attention_value_channels or hparams.hidden_size, hparams.hidden_size, hparams.num_heads, hparams.attention_dropout) y = common_layers.layer_postprocess(x, y, hparams) hparams.hidden_size = old_hid_size return y def g(x): """g(x) for reversible layer, feed-forward layer.""" old_hid_size = hparams.hidden_size hparams.hidden_size = old_hid_size // 2 with tf.variable_scope("ffn"): y = transformer.transformer_ffn_layer( common_layers.layer_preprocess(x, hparams), hparams) y = common_layers.layer_postprocess(x, y, hparams) hparams.hidden_size = old_hid_size return y x1, x2 = tf.split(encoder_input, 2, axis=-1) with tf.variable_scope(name): y1, y2 = contrib.layers().rev_block( x1, x2, f, g, num_layers=hparams.num_hidden_layers, f_side_input=[encoder_self_attention_bias], is_training=hparams.mode == tf.estimator.ModeKeys.TRAIN) y = tf.concat([y1, y2], axis=-1) return common_layers.layer_preprocess(y, hparams)
Example #9
Source File: universal_transformer_util.py From tensor2tensor with Apache License 2.0 | 4 votes |
def transformer_encoder_ffn_unit(x, hparams, nonpadding_mask=None, pad_remover=None): """Applies a feed-forward function which is parametrised for encoding. Args: x: input hparams: model hyper-parameters nonpadding_mask: optional Tensor with shape [batch_size, encoder_length] indicating what positions are not padding. This is used to mask out padding in convoltutional layers. We generally only need this mask for "packed" datasets, because for ordinary datasets, no padding is ever followed by nonpadding. pad_remover: to mask out padding in convolutional layers (efficiency). Returns: the output tensor """ with tf.variable_scope("ffn"): if hparams.transformer_ffn_type == "fc": y = transformer.transformer_ffn_layer( common_layers.layer_preprocess(x, hparams), hparams, pad_remover, conv_padding="SAME", nonpadding_mask=nonpadding_mask) if hparams.transformer_ffn_type == "sepconv": assert nonpadding_mask is not None, ( "The nonpadding_mask should be provided, otherwise the model uses " "the leaked padding information to estimate the length!") y = common_layers.sepconv_relu_sepconv( common_layers.layer_preprocess(x, hparams), filter_size=hparams.filter_size, output_size=hparams.hidden_size, first_kernel_size=(3, 1), second_kernel_size=(5, 1), padding="SAME", nonpadding_mask=nonpadding_mask, dropout=hparams.relu_dropout) x = common_layers.layer_postprocess(x, y, hparams) return x
Example #10
Source File: transformer_revnet.py From BERT with Apache License 2.0 | 4 votes |
def transformer_revnet_encoder(encoder_input, encoder_self_attention_bias, hparams, name="encoder"): """A stack of transformer layers. Args: encoder_input: a Tensor encoder_self_attention_bias: bias Tensor for self-attention (see common_attention.attention_bias()) hparams: hyperparameters for model name: a string Returns: y: a Tensors """ def f(x, side_input): """f(x) for reversible layer, self-attention layer.""" encoder_self_attention_bias = side_input[0] old_hid_size = hparams.hidden_size hparams.hidden_size = old_hid_size // 2 with tf.variable_scope("self_attention"): y = common_attention.multihead_attention( common_layers.layer_preprocess( x, hparams), None, encoder_self_attention_bias, hparams.attention_key_channels or hparams.hidden_size, hparams.attention_value_channels or hparams.hidden_size, hparams.hidden_size, hparams.num_heads, hparams.attention_dropout) y = common_layers.layer_postprocess(x, y, hparams) hparams.hidden_size = old_hid_size return y def g(x): """g(x) for reversible layer, feed-forward layer.""" old_hid_size = hparams.hidden_size hparams.hidden_size = old_hid_size // 2 with tf.variable_scope("ffn"): y = transformer.transformer_ffn_layer( common_layers.layer_preprocess(x, hparams), hparams) y = common_layers.layer_postprocess(x, y, hparams) hparams.hidden_size = old_hid_size return y x1, x2 = tf.split(encoder_input, 2, axis=-1) with tf.variable_scope(name): y1, y2 = tf.contrib.layers.rev_block( x1, x2, f, g, num_layers=hparams.num_hidden_layers, f_side_input=[encoder_self_attention_bias], is_training=hparams.mode == tf.estimator.ModeKeys.TRAIN) y = tf.concat([y1, y2], axis=-1) return common_layers.layer_preprocess(y, hparams)
Example #11
Source File: universal_transformer_util.py From BERT with Apache License 2.0 | 4 votes |
def transformer_encoder_ffn_unit(x, hparams, nonpadding_mask=None, pad_remover=None): """Applies a feed-forward function which is parametrised for encoding. Args: x: input hparams: model hyper-parameters nonpadding_mask: optional Tensor with shape [batch_size, encoder_length] indicating what positions are not padding. This is used to mask out padding in convoltutional layers. We generally only need this mask for "packed" datasets, because for ordinary datasets, no padding is ever followed by nonpadding. pad_remover: to mask out padding in convolutional layers (efficiency). Returns: the output tensor """ with tf.variable_scope("ffn"): if hparams.transformer_ffn_type == "fc": y = transformer.transformer_ffn_layer( common_layers.layer_preprocess(x, hparams), hparams, pad_remover, conv_padding="SAME", nonpadding_mask=nonpadding_mask) if hparams.transformer_ffn_type == "sepconv": assert nonpadding_mask is not None, ( "The nonpadding_mask should be provided, otherwise the model uses " "the leaked padding information to estimate the length!") y = common_layers.sepconv_relu_sepconv( common_layers.layer_preprocess(x, hparams), filter_size=hparams.filter_size, output_size=hparams.hidden_size, first_kernel_size=(3, 1), second_kernel_size=(5, 1), padding="SAME", nonpadding_mask=nonpadding_mask, dropout=hparams.relu_dropout) x = common_layers.layer_postprocess(x, y, hparams) return x
Example #12
Source File: transformer_revnet.py From training_results_v0.5 with Apache License 2.0 | 4 votes |
def transformer_revnet_encoder(encoder_input, encoder_self_attention_bias, hparams, name="encoder"): """A stack of transformer layers. Args: encoder_input: a Tensor encoder_self_attention_bias: bias Tensor for self-attention (see common_attention.attention_bias()) hparams: hyperparameters for model name: a string Returns: y: a Tensors """ def f(x, side_input): """f(x) for reversible layer, self-attention layer.""" encoder_self_attention_bias = side_input[0] old_hid_size = hparams.hidden_size hparams.hidden_size = old_hid_size // 2 with tf.variable_scope("self_attention"): y = common_attention.multihead_attention( common_layers.layer_preprocess( x, hparams), None, encoder_self_attention_bias, hparams.attention_key_channels or hparams.hidden_size, hparams.attention_value_channels or hparams.hidden_size, hparams.hidden_size, hparams.num_heads, hparams.attention_dropout) y = common_layers.layer_postprocess(x, y, hparams) hparams.hidden_size = old_hid_size return y def g(x): """g(x) for reversible layer, feed-forward layer.""" old_hid_size = hparams.hidden_size hparams.hidden_size = old_hid_size // 2 with tf.variable_scope("ffn"): y = transformer.transformer_ffn_layer( common_layers.layer_preprocess(x, hparams), hparams) y = common_layers.layer_postprocess(x, y, hparams) hparams.hidden_size = old_hid_size return y x1, x2 = tf.split(encoder_input, 2, axis=-1) with tf.variable_scope(name): y1, y2 = tf.contrib.layers.rev_block( x1, x2, f, g, num_layers=hparams.num_hidden_layers, f_side_input=[encoder_self_attention_bias], is_training=hparams.mode == tf.estimator.ModeKeys.TRAIN) y = tf.concat([y1, y2], axis=-1) return common_layers.layer_preprocess(y, hparams)
Example #13
Source File: universal_transformer_util.py From training_results_v0.5 with Apache License 2.0 | 4 votes |
def transformer_encoder_ffn_unit(x, hparams, nonpadding_mask=None, pad_remover=None): """Applies a feed-forward function which is parametrised for encoding. Args: x: input hparams: model hyper-parameters nonpadding_mask: optional Tensor with shape [batch_size, encoder_length] indicating what positions are not padding. This is used to mask out padding in convoltutional layers. We generally only need this mask for "packed" datasets, because for ordinary datasets, no padding is ever followed by nonpadding. pad_remover: to mask out padding in convolutional layers (efficiency). Returns: the output tensor """ with tf.variable_scope("ffn"): if hparams.transformer_ffn_type == "fc": y = transformer.transformer_ffn_layer( common_layers.layer_preprocess(x, hparams), hparams, pad_remover, conv_padding="SAME", nonpadding_mask=nonpadding_mask) if hparams.transformer_ffn_type == "sepconv": assert nonpadding_mask is not None, ( "The nonpadding_mask should be provided, otherwise the model uses " "the leaked padding information to estimate the length!") y = common_layers.sepconv_relu_sepconv( common_layers.layer_preprocess(x, hparams), filter_size=hparams.filter_size, output_size=hparams.hidden_size, first_kernel_size=(3, 1), second_kernel_size=(5, 1), padding="SAME", nonpadding_mask=nonpadding_mask, dropout=hparams.relu_dropout) x = common_layers.layer_postprocess(x, y, hparams) return x
Example #14
Source File: universal_transformer_modified_utils.py From Graph-Transformer with Apache License 2.0 | 4 votes |
def transformer_encoder_ffn_unit(x, hparams, nonpadding_mask=None, pad_remover=None): """Applies a feed-forward function which is parametrised for encoding. Args: x: input hparams: model hyper-parameters nonpadding_mask: optional Tensor with shape [batch_size, encoder_length] indicating what positions are not padding. This is used to mask out padding in convoltutional layers. We generally only need this mask for "packed" datasets, because for ordinary datasets, no padding is ever followed by nonpadding. pad_remover: to mask out padding in convolutional layers (efficiency). Returns: the output tensor """ with tf.variable_scope("ffn"): if hparams.transformer_ffn_type == "fc": y = transformer.transformer_ffn_layer( common_layers.layer_preprocess(x, hparams), hparams, pad_remover, conv_padding="SAME", nonpadding_mask=nonpadding_mask) if hparams.transformer_ffn_type == "sepconv": assert nonpadding_mask is not None, ( "The nonpadding_mask should be provided, otherwise the model uses " "the leaked padding information to estimate the length!") y = common_layers.sepconv_relu_sepconv( common_layers.layer_preprocess(x, hparams), filter_size=hparams.filter_size, output_size=hparams.hidden_size, first_kernel_size=(3, 1), second_kernel_size=(5, 1), padding="SAME", nonpadding_mask=nonpadding_mask, dropout=hparams.relu_dropout) x = common_layers.layer_postprocess(x, y, hparams) return x