Python tensor2tensor.models.transformer.transformer_base() Examples

The following are 30 code examples of tensor2tensor.models.transformer.transformer_base(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensor2tensor.models.transformer , or try the search function .
Example #1
Source File: transformer_vae_flow_prior.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def iwslt_base():
  """Set of hyperparameters."""
  # Model architecture flags.
  hparams = transformer.transformer_base()
  hparams.num_hidden_layers = 5
  hparams.hidden_size = 256
  hparams.filter_size = 1024
  hparams.num_heads = 4
  # Other flags.
  hparams.summarize_grads = False
  hparams.summarize_vars = False
  # Optimization-related flags.
  hparams.clip_grad_norm = 1.0
  hparams.learning_rate_decay_scheme = "noam"
  hparams.learning_rate_warmup_steps = 8000
  hparams.learning_rate = 0.2
  hparams.learning_rate_schedule = (
      "constant*linear_warmup*rsqrt_decay*rsqrt_hidden_size")
  hparams.learning_rate_constant = 2.0
  hparams.add_hparam("predict_target_length", True)
  hparams.add_hparam("lendiff_bound", 30)
  hparams = update_hparams_for_tpu(hparams)
  hparams.add_hparam("pos_attn", False)
  return hparams 
Example #2
Source File: nas_layers_test.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def _apply_encoder_layer(translation_layer, output_depth, nonpadding_list):
  """Applies an encoder layer with basic arguments."""

  input_tensor = tf.random_uniform(
      [_BATCH_SIZE, _TOTAL_SEQUENCE_LENGTH, _INPUT_DEPTH]) / 4.0
  nonpadding = tf.constant(nonpadding_list)
  residual_tensor = tf.random_uniform(
      [_BATCH_SIZE, _TOTAL_SEQUENCE_LENGTH, output_depth])
  hparams = transformer.transformer_base()

  return translation_layer.apply_layer(
      input_tensor,
      residual_tensor,
      output_depth,
      tf.nn.relu,
      hparams,
      "",
      mask_future=False,
      nonpadding=nonpadding,
      layer_preprocess_fn=None,
      postprocess_dropout=True) 
Example #3
Source File: score2perf_hparams.py    From magenta with Apache License 2.0 5 votes vote down vote up
def t_rel_len2048_h512_att512_fs1024_n6_dropout10():
  """Hparams for LM with relative attention."""
  hparams = transformer.transformer_base()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_medium(hparams)
  hparams.self_attention_type = "dot_product_relative_v2"
  hparams.num_hidden_layers = 6
  return hparams 
Example #4
Source File: transformer_parallel.py    From BERT with Apache License 2.0 5 votes vote down vote up
def transformer_base_bs5():
  hparams = transformer.transformer_base()
  hparams.add_hparam("block_size", 5)
  return hparams 
Example #5
Source File: transformer_parallel.py    From BERT with Apache License 2.0 5 votes vote down vote up
def transformer_base_bs7():
  hparams = transformer.transformer_base()
  hparams.add_hparam("block_size", 7)
  return hparams 
Example #6
Source File: transformer_parallel.py    From BERT with Apache License 2.0 5 votes vote down vote up
def transformer_base_bs8():
  hparams = transformer.transformer_base()
  hparams.add_hparam("block_size", 8)
  return hparams 
Example #7
Source File: transformer_parallel.py    From BERT with Apache License 2.0 5 votes vote down vote up
def transformer_base_bs9():
  hparams = transformer.transformer_base()
  hparams.add_hparam("block_size", 9)
  return hparams 
Example #8
Source File: transformer_parallel.py    From BERT with Apache License 2.0 5 votes vote down vote up
def transformer_base_bs10():
  hparams = transformer.transformer_base()
  hparams.add_hparam("block_size", 10)
  return hparams 
Example #9
Source File: evolved_transformer.py    From BERT with Apache License 2.0 5 votes vote down vote up
def evolved_transformer_base():
  """Base parameters for Evolved Transformer model."""
  return add_evolved_transformer_hparams(transformer.transformer_base()) 
Example #10
Source File: nas_layers_test.py    From BERT with Apache License 2.0 5 votes vote down vote up
def _apply_decoder_layer(translation_layer, input_tensor, output_depth,
                         encoder_depth):
  """Applies an decoder layer with basic arguments."""

  residual_tensor_values = np.random.rand(
      *[_BATCH_SIZE, _TOTAL_SEQUENCE_LENGTH, output_depth]) - .5
  residual_tensor = tf.constant(residual_tensor_values, dtype=tf.float32)
  encoder_output_values = np.random.rand(
      *[_BATCH_SIZE, _TOTAL_SEQUENCE_LENGTH, encoder_depth]) - .5
  encoder_output = tf.constant(encoder_output_values, dtype=tf.float32)
  encoder_cell_outputs = [encoder_output] * _NUM_CELLS
  hparams = transformer.transformer_base()
  hparams.attention_dropout = 0
  decoder_self_attention_bias = (
      common_attention.attention_bias_lower_triangle(_TOTAL_SEQUENCE_LENGTH))

  output_tensor = translation_layer.apply_layer(
      input_tensor,
      residual_tensor,
      output_depth,
      None,
      hparams,
      "",
      nonpadding=None,
      mask_future=True,
      layer_preprocess_fn=None,
      postprocess_dropout=False,
      decoder_self_attention_bias=decoder_self_attention_bias,
      encoder_decoder_attention_bias=None,
      encoder_cell_outputs=encoder_cell_outputs,
      cell_number=_CELL_NUMBER)

  return output_tensor 
Example #11
Source File: nas_model_test.py    From BERT with Apache License 2.0 5 votes vote down vote up
def _get_wrong_output_dim_decoder_hparams(self):
    tf.reset_default_graph()

    hparams = transformer.transformer_base()
    _add_transformer_branching_hparams(hparams)
    hparams.num_heads = 1
    # Purposely scale up the final embedding depth.
    wrong_output_size = _EMBEDDING_DEPTH + 1
    hparams.decoder_left_output_dims[
        -2] = hparams.decoder_left_output_dims[-2] + 1
    hparams.decoder_left_output_dims[-1] = wrong_output_size

    return hparams, wrong_output_size 
Example #12
Source File: transformer_aux.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def transformer_aux_base():
  """Set of hyperparameters."""
  hparams = transformer.transformer_base()
  hparams.shared_embedding_and_softmax_weights = False
  hparams.add_hparam("shift_values", "1,2,3,4")
  return hparams 
Example #13
Source File: universal_transformer.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def universal_transformer_small():
  hparams = transformer.transformer_base()
  hparams = update_hparams_for_universal_transformer(hparams)
  return hparams 
Example #14
Source File: universal_transformer.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def transformer_teeny():
  hparams = transformer.transformer_base()
  hparams.hidden_size = 128
  hparams.filter_size = 128
  hparams.num_heads = 2
  return hparams 
Example #15
Source File: model_biword.py    From ancient-text-restoration with Apache License 2.0 5 votes vote down vote up
def get_learning_rate():
    hparams = transformer.transformer_base()
    return learning_rate_schedule(hparams) 
Example #16
Source File: score2perf_hparams.py    From magenta with Apache License 2.0 5 votes vote down vote up
def t_rel_len2048_h384_att512_fs1024_n8_dropout20():
  """Hparams for LM with relative attention."""
  hparams = transformer.transformer_base()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_small(hparams)
  update_dropout(hparams, 0.20)
  hparams.self_attention_type = "dot_product_relative_v2"
  hparams.num_hidden_layers = 8
  return hparams 
Example #17
Source File: score2perf_hparams.py    From magenta with Apache License 2.0 5 votes vote down vote up
def t_len2048_h384_att512_fs1024_n8_dropout20():
  """Hparams for LM with regular attention."""
  hparams = transformer.transformer_base()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_small(hparams)
  update_dropout(hparams, 0.20)
  hparams.num_hidden_layers = 8
  return hparams


#============= d10 ================== 
Example #18
Source File: transformer_parallel.py    From BERT with Apache License 2.0 5 votes vote down vote up
def transformer_base_bs4():
  hparams = transformer.transformer_base()
  hparams.add_hparam("block_size", 4)
  return hparams 
Example #19
Source File: score2perf_hparams.py    From magenta with Apache License 2.0 5 votes vote down vote up
def t_rel_len2048_h384_att512_fs1024_n6_dropout10():
  """Hparams for LM with relative attention."""
  hparams = transformer.transformer_base()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_small_lr(hparams)
  hparams.self_attention_type = "dot_product_relative_v2"
  hparams.num_hidden_layers = 6
  return hparams 
Example #20
Source File: score2perf_hparams.py    From magenta with Apache License 2.0 5 votes vote down vote up
def t_rel_len2048_h512_att512_fs1024_n8_dropout10():
  """Hparams for LM with relative attention."""
  hparams = transformer.transformer_base()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_medium(hparams)
  hparams.self_attention_type = "dot_product_relative_v2"
  hparams.num_hidden_layers = 8
  return hparams 
Example #21
Source File: score2perf_hparams.py    From magenta with Apache License 2.0 5 votes vote down vote up
def t_rel_len2048_h384_att512_fs1024_n6_dropout15():
  """Hparams for LM with relative attention."""
  hparams = transformer.transformer_base()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_small(hparams)
  update_dropout(hparams, 0.15)
  hparams.self_attention_type = "dot_product_relative_v2"
  hparams.num_hidden_layers = 6
  return hparams 
Example #22
Source File: score2perf_hparams.py    From magenta with Apache License 2.0 5 votes vote down vote up
def t_rel_len1024_h384_att512_fs1024_n2_dropout15():
  """Hparams for LM with relative attention."""
  hparams = transformer.transformer_base()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 1024)
  update_small(hparams)
  update_dropout(hparams, 0.15)
  hparams.self_attention_type = "dot_product_relative_v2"
  hparams.num_hidden_layers = 6
  return hparams 
Example #23
Source File: score2perf_hparams.py    From magenta with Apache License 2.0 5 votes vote down vote up
def t_rel_len2048_h384_att512_fs1024_n8_dropout15():
  """Hparams for LM with relative attention."""
  hparams = transformer.transformer_base()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_small(hparams)
  update_dropout(hparams, 0.15)
  hparams.self_attention_type = "dot_product_relative_v2"
  hparams.num_hidden_layers = 8
  return hparams 
Example #24
Source File: score2perf_hparams.py    From magenta with Apache License 2.0 5 votes vote down vote up
def t_len2048_h384_att512_fs1024_n8_dropout15():
  """Hparams for LM with regular attention."""
  hparams = transformer.transformer_base()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_small(hparams)
  update_dropout(hparams, 0.15)
  hparams.num_hidden_layers = 8
  return hparams 
Example #25
Source File: score2perf_hparams.py    From magenta with Apache License 2.0 5 votes vote down vote up
def t_rel_len2048_h384_att512_fs1024_n10_dropout30():
  """Hparams for LM with relative attention."""
  hparams = transformer.transformer_base()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_small(hparams)
  update_dropout(hparams, 0.3)
  hparams.self_attention_type = "dot_product_relative_v2"
  hparams.num_hidden_layers = 10
  return hparams 
Example #26
Source File: score2perf_hparams.py    From magenta with Apache License 2.0 5 votes vote down vote up
def t_rel_len2048_dropout15_tiny():
  """Hparams for LM with relative attention, tiny transformer."""
  # hparams = transformer.transformer_base()
  hparams = transformer.transformer_tiny()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_dropout(hparams, 0.15)
  hparams.self_attention_type = "dot_product_relative_v2"
  # Need to specify num_hidden_layers
  hparams.attention_key_channels = 512
  hparams.num_hidden_layers = 8
  return hparams 
Example #27
Source File: score2perf.py    From magenta with Apache License 2.0 5 votes vote down vote up
def score2perf_transformer_base():
  hparams = transformer.transformer_base()
  hparams.bottom['inputs'] = modalities.bottom
  return hparams 
Example #28
Source File: transformer_hparams.py    From Seq2seqChatbots with MIT License 5 votes vote down vote up
def general_transformer_hparams():
  hparams = transformer.transformer_base()
  hparams.add_hparam('roulette', TRANSFORMER_HPARAMS['roulette_wheel'])
  hparams.add_hparam('roulette_beam_size',
                     TRANSFORMER_HPARAMS['roulette_beam_size'])

  hparams.batch_size = TRANSFORMER_HPARAMS['batch_size']
  hparams.layer_prepostprocess_dropout = TRANSFORMER_HPARAMS['layer_dropout']
  hparams.symbol_modality_num_shards = TRANSFORMER_HPARAMS['embed_num_shards']
  hparams.attention_dropout = TRANSFORMER_HPARAMS['attention_dropout']
  hparams.relu_dropout = TRANSFORMER_HPARAMS['relu_dropout']
  hparams.summarize_vars = TRANSFORMER_HPARAMS['summarize_vars']

  return hparams 
Example #29
Source File: transformer_hparams.py    From Seq2seqChatbots with MIT License 5 votes vote down vote up
def chatbot_cornell_base():
  hparams = transformer.transformer_base()
  hparams.learning_rate_warmup_steps = 16000
  return hparams


# Different batch sizes. 
Example #30
Source File: universal_transformer_modified.py    From Graph-Transformer with Apache License 2.0 5 votes vote down vote up
def universal_transformer_small1():
  hparams = transformer.transformer_base()
  hparams = update_hparams_for_universal_transformer(hparams)
  return hparams