Python tensor2tensor.models.transformer.transformer_base() Examples
The following are 30
code examples of tensor2tensor.models.transformer.transformer_base().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensor2tensor.models.transformer
, or try the search function
.
Example #1
Source File: transformer_vae_flow_prior.py From tensor2tensor with Apache License 2.0 | 6 votes |
def iwslt_base(): """Set of hyperparameters.""" # Model architecture flags. hparams = transformer.transformer_base() hparams.num_hidden_layers = 5 hparams.hidden_size = 256 hparams.filter_size = 1024 hparams.num_heads = 4 # Other flags. hparams.summarize_grads = False hparams.summarize_vars = False # Optimization-related flags. hparams.clip_grad_norm = 1.0 hparams.learning_rate_decay_scheme = "noam" hparams.learning_rate_warmup_steps = 8000 hparams.learning_rate = 0.2 hparams.learning_rate_schedule = ( "constant*linear_warmup*rsqrt_decay*rsqrt_hidden_size") hparams.learning_rate_constant = 2.0 hparams.add_hparam("predict_target_length", True) hparams.add_hparam("lendiff_bound", 30) hparams = update_hparams_for_tpu(hparams) hparams.add_hparam("pos_attn", False) return hparams
Example #2
Source File: nas_layers_test.py From tensor2tensor with Apache License 2.0 | 6 votes |
def _apply_encoder_layer(translation_layer, output_depth, nonpadding_list): """Applies an encoder layer with basic arguments.""" input_tensor = tf.random_uniform( [_BATCH_SIZE, _TOTAL_SEQUENCE_LENGTH, _INPUT_DEPTH]) / 4.0 nonpadding = tf.constant(nonpadding_list) residual_tensor = tf.random_uniform( [_BATCH_SIZE, _TOTAL_SEQUENCE_LENGTH, output_depth]) hparams = transformer.transformer_base() return translation_layer.apply_layer( input_tensor, residual_tensor, output_depth, tf.nn.relu, hparams, "", mask_future=False, nonpadding=nonpadding, layer_preprocess_fn=None, postprocess_dropout=True)
Example #3
Source File: score2perf_hparams.py From magenta with Apache License 2.0 | 5 votes |
def t_rel_len2048_h512_att512_fs1024_n6_dropout10(): """Hparams for LM with relative attention.""" hparams = transformer.transformer_base() update_transformer_hparams_for_music(hparams) update_truncate_length(hparams, 2048) update_medium(hparams) hparams.self_attention_type = "dot_product_relative_v2" hparams.num_hidden_layers = 6 return hparams
Example #4
Source File: transformer_parallel.py From BERT with Apache License 2.0 | 5 votes |
def transformer_base_bs5(): hparams = transformer.transformer_base() hparams.add_hparam("block_size", 5) return hparams
Example #5
Source File: transformer_parallel.py From BERT with Apache License 2.0 | 5 votes |
def transformer_base_bs7(): hparams = transformer.transformer_base() hparams.add_hparam("block_size", 7) return hparams
Example #6
Source File: transformer_parallel.py From BERT with Apache License 2.0 | 5 votes |
def transformer_base_bs8(): hparams = transformer.transformer_base() hparams.add_hparam("block_size", 8) return hparams
Example #7
Source File: transformer_parallel.py From BERT with Apache License 2.0 | 5 votes |
def transformer_base_bs9(): hparams = transformer.transformer_base() hparams.add_hparam("block_size", 9) return hparams
Example #8
Source File: transformer_parallel.py From BERT with Apache License 2.0 | 5 votes |
def transformer_base_bs10(): hparams = transformer.transformer_base() hparams.add_hparam("block_size", 10) return hparams
Example #9
Source File: evolved_transformer.py From BERT with Apache License 2.0 | 5 votes |
def evolved_transformer_base(): """Base parameters for Evolved Transformer model.""" return add_evolved_transformer_hparams(transformer.transformer_base())
Example #10
Source File: nas_layers_test.py From BERT with Apache License 2.0 | 5 votes |
def _apply_decoder_layer(translation_layer, input_tensor, output_depth, encoder_depth): """Applies an decoder layer with basic arguments.""" residual_tensor_values = np.random.rand( *[_BATCH_SIZE, _TOTAL_SEQUENCE_LENGTH, output_depth]) - .5 residual_tensor = tf.constant(residual_tensor_values, dtype=tf.float32) encoder_output_values = np.random.rand( *[_BATCH_SIZE, _TOTAL_SEQUENCE_LENGTH, encoder_depth]) - .5 encoder_output = tf.constant(encoder_output_values, dtype=tf.float32) encoder_cell_outputs = [encoder_output] * _NUM_CELLS hparams = transformer.transformer_base() hparams.attention_dropout = 0 decoder_self_attention_bias = ( common_attention.attention_bias_lower_triangle(_TOTAL_SEQUENCE_LENGTH)) output_tensor = translation_layer.apply_layer( input_tensor, residual_tensor, output_depth, None, hparams, "", nonpadding=None, mask_future=True, layer_preprocess_fn=None, postprocess_dropout=False, decoder_self_attention_bias=decoder_self_attention_bias, encoder_decoder_attention_bias=None, encoder_cell_outputs=encoder_cell_outputs, cell_number=_CELL_NUMBER) return output_tensor
Example #11
Source File: nas_model_test.py From BERT with Apache License 2.0 | 5 votes |
def _get_wrong_output_dim_decoder_hparams(self): tf.reset_default_graph() hparams = transformer.transformer_base() _add_transformer_branching_hparams(hparams) hparams.num_heads = 1 # Purposely scale up the final embedding depth. wrong_output_size = _EMBEDDING_DEPTH + 1 hparams.decoder_left_output_dims[ -2] = hparams.decoder_left_output_dims[-2] + 1 hparams.decoder_left_output_dims[-1] = wrong_output_size return hparams, wrong_output_size
Example #12
Source File: transformer_aux.py From training_results_v0.5 with Apache License 2.0 | 5 votes |
def transformer_aux_base(): """Set of hyperparameters.""" hparams = transformer.transformer_base() hparams.shared_embedding_and_softmax_weights = False hparams.add_hparam("shift_values", "1,2,3,4") return hparams
Example #13
Source File: universal_transformer.py From training_results_v0.5 with Apache License 2.0 | 5 votes |
def universal_transformer_small(): hparams = transformer.transformer_base() hparams = update_hparams_for_universal_transformer(hparams) return hparams
Example #14
Source File: universal_transformer.py From training_results_v0.5 with Apache License 2.0 | 5 votes |
def transformer_teeny(): hparams = transformer.transformer_base() hparams.hidden_size = 128 hparams.filter_size = 128 hparams.num_heads = 2 return hparams
Example #15
Source File: model_biword.py From ancient-text-restoration with Apache License 2.0 | 5 votes |
def get_learning_rate(): hparams = transformer.transformer_base() return learning_rate_schedule(hparams)
Example #16
Source File: score2perf_hparams.py From magenta with Apache License 2.0 | 5 votes |
def t_rel_len2048_h384_att512_fs1024_n8_dropout20(): """Hparams for LM with relative attention.""" hparams = transformer.transformer_base() update_transformer_hparams_for_music(hparams) update_truncate_length(hparams, 2048) update_small(hparams) update_dropout(hparams, 0.20) hparams.self_attention_type = "dot_product_relative_v2" hparams.num_hidden_layers = 8 return hparams
Example #17
Source File: score2perf_hparams.py From magenta with Apache License 2.0 | 5 votes |
def t_len2048_h384_att512_fs1024_n8_dropout20(): """Hparams for LM with regular attention.""" hparams = transformer.transformer_base() update_transformer_hparams_for_music(hparams) update_truncate_length(hparams, 2048) update_small(hparams) update_dropout(hparams, 0.20) hparams.num_hidden_layers = 8 return hparams #============= d10 ==================
Example #18
Source File: transformer_parallel.py From BERT with Apache License 2.0 | 5 votes |
def transformer_base_bs4(): hparams = transformer.transformer_base() hparams.add_hparam("block_size", 4) return hparams
Example #19
Source File: score2perf_hparams.py From magenta with Apache License 2.0 | 5 votes |
def t_rel_len2048_h384_att512_fs1024_n6_dropout10(): """Hparams for LM with relative attention.""" hparams = transformer.transformer_base() update_transformer_hparams_for_music(hparams) update_truncate_length(hparams, 2048) update_small_lr(hparams) hparams.self_attention_type = "dot_product_relative_v2" hparams.num_hidden_layers = 6 return hparams
Example #20
Source File: score2perf_hparams.py From magenta with Apache License 2.0 | 5 votes |
def t_rel_len2048_h512_att512_fs1024_n8_dropout10(): """Hparams for LM with relative attention.""" hparams = transformer.transformer_base() update_transformer_hparams_for_music(hparams) update_truncate_length(hparams, 2048) update_medium(hparams) hparams.self_attention_type = "dot_product_relative_v2" hparams.num_hidden_layers = 8 return hparams
Example #21
Source File: score2perf_hparams.py From magenta with Apache License 2.0 | 5 votes |
def t_rel_len2048_h384_att512_fs1024_n6_dropout15(): """Hparams for LM with relative attention.""" hparams = transformer.transformer_base() update_transformer_hparams_for_music(hparams) update_truncate_length(hparams, 2048) update_small(hparams) update_dropout(hparams, 0.15) hparams.self_attention_type = "dot_product_relative_v2" hparams.num_hidden_layers = 6 return hparams
Example #22
Source File: score2perf_hparams.py From magenta with Apache License 2.0 | 5 votes |
def t_rel_len1024_h384_att512_fs1024_n2_dropout15(): """Hparams for LM with relative attention.""" hparams = transformer.transformer_base() update_transformer_hparams_for_music(hparams) update_truncate_length(hparams, 1024) update_small(hparams) update_dropout(hparams, 0.15) hparams.self_attention_type = "dot_product_relative_v2" hparams.num_hidden_layers = 6 return hparams
Example #23
Source File: score2perf_hparams.py From magenta with Apache License 2.0 | 5 votes |
def t_rel_len2048_h384_att512_fs1024_n8_dropout15(): """Hparams for LM with relative attention.""" hparams = transformer.transformer_base() update_transformer_hparams_for_music(hparams) update_truncate_length(hparams, 2048) update_small(hparams) update_dropout(hparams, 0.15) hparams.self_attention_type = "dot_product_relative_v2" hparams.num_hidden_layers = 8 return hparams
Example #24
Source File: score2perf_hparams.py From magenta with Apache License 2.0 | 5 votes |
def t_len2048_h384_att512_fs1024_n8_dropout15(): """Hparams for LM with regular attention.""" hparams = transformer.transformer_base() update_transformer_hparams_for_music(hparams) update_truncate_length(hparams, 2048) update_small(hparams) update_dropout(hparams, 0.15) hparams.num_hidden_layers = 8 return hparams
Example #25
Source File: score2perf_hparams.py From magenta with Apache License 2.0 | 5 votes |
def t_rel_len2048_h384_att512_fs1024_n10_dropout30(): """Hparams for LM with relative attention.""" hparams = transformer.transformer_base() update_transformer_hparams_for_music(hparams) update_truncate_length(hparams, 2048) update_small(hparams) update_dropout(hparams, 0.3) hparams.self_attention_type = "dot_product_relative_v2" hparams.num_hidden_layers = 10 return hparams
Example #26
Source File: score2perf_hparams.py From magenta with Apache License 2.0 | 5 votes |
def t_rel_len2048_dropout15_tiny(): """Hparams for LM with relative attention, tiny transformer.""" # hparams = transformer.transformer_base() hparams = transformer.transformer_tiny() update_transformer_hparams_for_music(hparams) update_truncate_length(hparams, 2048) update_dropout(hparams, 0.15) hparams.self_attention_type = "dot_product_relative_v2" # Need to specify num_hidden_layers hparams.attention_key_channels = 512 hparams.num_hidden_layers = 8 return hparams
Example #27
Source File: score2perf.py From magenta with Apache License 2.0 | 5 votes |
def score2perf_transformer_base(): hparams = transformer.transformer_base() hparams.bottom['inputs'] = modalities.bottom return hparams
Example #28
Source File: transformer_hparams.py From Seq2seqChatbots with MIT License | 5 votes |
def general_transformer_hparams(): hparams = transformer.transformer_base() hparams.add_hparam('roulette', TRANSFORMER_HPARAMS['roulette_wheel']) hparams.add_hparam('roulette_beam_size', TRANSFORMER_HPARAMS['roulette_beam_size']) hparams.batch_size = TRANSFORMER_HPARAMS['batch_size'] hparams.layer_prepostprocess_dropout = TRANSFORMER_HPARAMS['layer_dropout'] hparams.symbol_modality_num_shards = TRANSFORMER_HPARAMS['embed_num_shards'] hparams.attention_dropout = TRANSFORMER_HPARAMS['attention_dropout'] hparams.relu_dropout = TRANSFORMER_HPARAMS['relu_dropout'] hparams.summarize_vars = TRANSFORMER_HPARAMS['summarize_vars'] return hparams
Example #29
Source File: transformer_hparams.py From Seq2seqChatbots with MIT License | 5 votes |
def chatbot_cornell_base(): hparams = transformer.transformer_base() hparams.learning_rate_warmup_steps = 16000 return hparams # Different batch sizes.
Example #30
Source File: universal_transformer_modified.py From Graph-Transformer with Apache License 2.0 | 5 votes |
def universal_transformer_small1(): hparams = transformer.transformer_base() hparams = update_hparams_for_universal_transformer(hparams) return hparams