Python tensor2tensor.models.transformer.transformer_base() Examples

The following are 30 code examples of tensor2tensor.models.transformer.transformer_base(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensor2tensor.models.transformer , or try the search function

Example #1

Source File: transformer_vae_flow_prior.py From tensor2tensor with Apache License 2.0

6 votes

def iwslt_base():
  """Set of hyperparameters."""
  # Model architecture flags.
  hparams = transformer.transformer_base()
  hparams.num_hidden_layers = 5
  hparams.hidden_size = 256
  hparams.filter_size = 1024
  hparams.num_heads = 4
  # Other flags.
  hparams.summarize_grads = False
  hparams.summarize_vars = False
  # Optimization-related flags.
  hparams.clip_grad_norm = 1.0
  hparams.learning_rate_decay_scheme = "noam"
  hparams.learning_rate_warmup_steps = 8000
  hparams.learning_rate = 0.2
  hparams.learning_rate_schedule = (
      "constant*linear_warmup*rsqrt_decay*rsqrt_hidden_size")
  hparams.learning_rate_constant = 2.0
  hparams.add_hparam("predict_target_length", True)
  hparams.add_hparam("lendiff_bound", 30)
  hparams = update_hparams_for_tpu(hparams)
  hparams.add_hparam("pos_attn", False)
  return hparams

Example #2

Source File: nas_layers_test.py From tensor2tensor with Apache License 2.0

6 votes

def _apply_encoder_layer(translation_layer, output_depth, nonpadding_list):
  """Applies an encoder layer with basic arguments."""

  input_tensor = tf.random_uniform(
      [_BATCH_SIZE, _TOTAL_SEQUENCE_LENGTH, _INPUT_DEPTH]) / 4.0
  nonpadding = tf.constant(nonpadding_list)
  residual_tensor = tf.random_uniform(
      [_BATCH_SIZE, _TOTAL_SEQUENCE_LENGTH, output_depth])
  hparams = transformer.transformer_base()

  return translation_layer.apply_layer(
      input_tensor,
      residual_tensor,
      output_depth,
      tf.nn.relu,
      hparams,
      "",
      mask_future=False,
      nonpadding=nonpadding,
      layer_preprocess_fn=None,
      postprocess_dropout=True)

Example #3

Source File: score2perf_hparams.py From magenta with Apache License 2.0

5 votes

def t_rel_len2048_h512_att512_fs1024_n6_dropout10():
  """Hparams for LM with relative attention."""
  hparams = transformer.transformer_base()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_medium(hparams)
  hparams.self_attention_type = "dot_product_relative_v2"
  hparams.num_hidden_layers = 6
  return hparams

Example #4

Source File: transformer_parallel.py From BERT with Apache License 2.0

5 votes

def transformer_base_bs5():
  hparams = transformer.transformer_base()
  hparams.add_hparam("block_size", 5)
  return hparams

Example #5

Source File: transformer_parallel.py From BERT with Apache License 2.0

5 votes

def transformer_base_bs7():
  hparams = transformer.transformer_base()
  hparams.add_hparam("block_size", 7)
  return hparams

Example #6

Source File: transformer_parallel.py From BERT with Apache License 2.0

5 votes

def transformer_base_bs8():
  hparams = transformer.transformer_base()
  hparams.add_hparam("block_size", 8)
  return hparams

Example #7

Source File: transformer_parallel.py From BERT with Apache License 2.0

5 votes

def transformer_base_bs9():
  hparams = transformer.transformer_base()
  hparams.add_hparam("block_size", 9)
  return hparams

Example #8

Source File: transformer_parallel.py From BERT with Apache License 2.0

5 votes

def transformer_base_bs10():
  hparams = transformer.transformer_base()
  hparams.add_hparam("block_size", 10)
  return hparams

Example #9

Source File: evolved_transformer.py From BERT with Apache License 2.0

5 votes

def evolved_transformer_base():
  """Base parameters for Evolved Transformer model."""
  return add_evolved_transformer_hparams(transformer.transformer_base())

Example #10

Source File: nas_layers_test.py From BERT with Apache License 2.0

5 votes

def _apply_decoder_layer(translation_layer, input_tensor, output_depth,
                         encoder_depth):
  """Applies an decoder layer with basic arguments."""

  residual_tensor_values = np.random.rand(
      *[_BATCH_SIZE, _TOTAL_SEQUENCE_LENGTH, output_depth]) - .5
  residual_tensor = tf.constant(residual_tensor_values, dtype=tf.float32)
  encoder_output_values = np.random.rand(
      *[_BATCH_SIZE, _TOTAL_SEQUENCE_LENGTH, encoder_depth]) - .5
  encoder_output = tf.constant(encoder_output_values, dtype=tf.float32)
  encoder_cell_outputs = [encoder_output] * _NUM_CELLS
  hparams = transformer.transformer_base()
  hparams.attention_dropout = 0
  decoder_self_attention_bias = (
      common_attention.attention_bias_lower_triangle(_TOTAL_SEQUENCE_LENGTH))

  output_tensor = translation_layer.apply_layer(
      input_tensor,
      residual_tensor,
      output_depth,
      None,
      hparams,
      "",
      nonpadding=None,
      mask_future=True,
      layer_preprocess_fn=None,
      postprocess_dropout=False,
      decoder_self_attention_bias=decoder_self_attention_bias,
      encoder_decoder_attention_bias=None,
      encoder_cell_outputs=encoder_cell_outputs,
      cell_number=_CELL_NUMBER)

  return output_tensor

Example #11

Source File: nas_model_test.py From BERT with Apache License 2.0

5 votes

def _get_wrong_output_dim_decoder_hparams(self):
    tf.reset_default_graph()

    hparams = transformer.transformer_base()
    _add_transformer_branching_hparams(hparams)
    hparams.num_heads = 1
    # Purposely scale up the final embedding depth.
    wrong_output_size = _EMBEDDING_DEPTH + 1
    hparams.decoder_left_output_dims[
        -2] = hparams.decoder_left_output_dims[-2] + 1
    hparams.decoder_left_output_dims[-1] = wrong_output_size

    return hparams, wrong_output_size

Example #12

Source File: transformer_aux.py From training_results_v0.5 with Apache License 2.0

5 votes

def transformer_aux_base():
  """Set of hyperparameters."""
  hparams = transformer.transformer_base()
  hparams.shared_embedding_and_softmax_weights = False
  hparams.add_hparam("shift_values", "1,2,3,4")
  return hparams

Example #13

Source File: universal_transformer.py From training_results_v0.5 with Apache License 2.0

5 votes

def universal_transformer_small():
  hparams = transformer.transformer_base()
  hparams = update_hparams_for_universal_transformer(hparams)
  return hparams

Example #14

Source File: universal_transformer.py From training_results_v0.5 with Apache License 2.0

5 votes

def transformer_teeny():
  hparams = transformer.transformer_base()
  hparams.hidden_size = 128
  hparams.filter_size = 128
  hparams.num_heads = 2
  return hparams

Example #15

Source File: model_biword.py From ancient-text-restoration with Apache License 2.0

5 votes

def get_learning_rate():
    hparams = transformer.transformer_base()
    return learning_rate_schedule(hparams)

Example #16

Source File: score2perf_hparams.py From magenta with Apache License 2.0

5 votes

def t_rel_len2048_h384_att512_fs1024_n8_dropout20():
  """Hparams for LM with relative attention."""
  hparams = transformer.transformer_base()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_small(hparams)
  update_dropout(hparams, 0.20)
  hparams.self_attention_type = "dot_product_relative_v2"
  hparams.num_hidden_layers = 8
  return hparams

Example #17

Source File: score2perf_hparams.py From magenta with Apache License 2.0

5 votes

def t_len2048_h384_att512_fs1024_n8_dropout20():
  """Hparams for LM with regular attention."""
  hparams = transformer.transformer_base()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_small(hparams)
  update_dropout(hparams, 0.20)
  hparams.num_hidden_layers = 8
  return hparams


#============= d10 ==================

Example #18

Source File: transformer_parallel.py From BERT with Apache License 2.0

5 votes

def transformer_base_bs4():
  hparams = transformer.transformer_base()
  hparams.add_hparam("block_size", 4)
  return hparams

Example #19

Source File: score2perf_hparams.py From magenta with Apache License 2.0

5 votes

def t_rel_len2048_h384_att512_fs1024_n6_dropout10():
  """Hparams for LM with relative attention."""
  hparams = transformer.transformer_base()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_small_lr(hparams)
  hparams.self_attention_type = "dot_product_relative_v2"
  hparams.num_hidden_layers = 6
  return hparams

Example #20

Source File: score2perf_hparams.py From magenta with Apache License 2.0

5 votes

def t_rel_len2048_h512_att512_fs1024_n8_dropout10():
  """Hparams for LM with relative attention."""
  hparams = transformer.transformer_base()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_medium(hparams)
  hparams.self_attention_type = "dot_product_relative_v2"
  hparams.num_hidden_layers = 8
  return hparams

Example #21

Source File: score2perf_hparams.py From magenta with Apache License 2.0

5 votes

def t_rel_len2048_h384_att512_fs1024_n6_dropout15():
  """Hparams for LM with relative attention."""
  hparams = transformer.transformer_base()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_small(hparams)
  update_dropout(hparams, 0.15)
  hparams.self_attention_type = "dot_product_relative_v2"
  hparams.num_hidden_layers = 6
  return hparams

Example #22

Source File: score2perf_hparams.py From magenta with Apache License 2.0

5 votes

def t_rel_len1024_h384_att512_fs1024_n2_dropout15():
  """Hparams for LM with relative attention."""
  hparams = transformer.transformer_base()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 1024)
  update_small(hparams)
  update_dropout(hparams, 0.15)
  hparams.self_attention_type = "dot_product_relative_v2"
  hparams.num_hidden_layers = 6
  return hparams

Example #23

Source File: score2perf_hparams.py From magenta with Apache License 2.0

5 votes

def t_rel_len2048_h384_att512_fs1024_n8_dropout15():
  """Hparams for LM with relative attention."""
  hparams = transformer.transformer_base()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_small(hparams)
  update_dropout(hparams, 0.15)
  hparams.self_attention_type = "dot_product_relative_v2"
  hparams.num_hidden_layers = 8
  return hparams

Example #24

Source File: score2perf_hparams.py From magenta with Apache License 2.0

5 votes

def t_len2048_h384_att512_fs1024_n8_dropout15():
  """Hparams for LM with regular attention."""
  hparams = transformer.transformer_base()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_small(hparams)
  update_dropout(hparams, 0.15)
  hparams.num_hidden_layers = 8
  return hparams

Example #25

Source File: score2perf_hparams.py From magenta with Apache License 2.0

5 votes

def t_rel_len2048_h384_att512_fs1024_n10_dropout30():
  """Hparams for LM with relative attention."""
  hparams = transformer.transformer_base()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_small(hparams)
  update_dropout(hparams, 0.3)
  hparams.self_attention_type = "dot_product_relative_v2"
  hparams.num_hidden_layers = 10
  return hparams

Example #26

Source File: score2perf_hparams.py From magenta with Apache License 2.0

5 votes

def t_rel_len2048_dropout15_tiny():
  """Hparams for LM with relative attention, tiny transformer."""
  # hparams = transformer.transformer_base()
  hparams = transformer.transformer_tiny()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_dropout(hparams, 0.15)
  hparams.self_attention_type = "dot_product_relative_v2"
  # Need to specify num_hidden_layers
  hparams.attention_key_channels = 512
  hparams.num_hidden_layers = 8
  return hparams

Example #27

Source File: score2perf.py From magenta with Apache License 2.0

5 votes

def score2perf_transformer_base():
  hparams = transformer.transformer_base()
  hparams.bottom['inputs'] = modalities.bottom
  return hparams

Example #28

Source File: transformer_hparams.py From Seq2seqChatbots with MIT License

5 votes

def general_transformer_hparams():
  hparams = transformer.transformer_base()
  hparams.add_hparam('roulette', TRANSFORMER_HPARAMS['roulette_wheel'])
  hparams.add_hparam('roulette_beam_size',
                     TRANSFORMER_HPARAMS['roulette_beam_size'])

  hparams.batch_size = TRANSFORMER_HPARAMS['batch_size']
  hparams.layer_prepostprocess_dropout = TRANSFORMER_HPARAMS['layer_dropout']
  hparams.symbol_modality_num_shards = TRANSFORMER_HPARAMS['embed_num_shards']
  hparams.attention_dropout = TRANSFORMER_HPARAMS['attention_dropout']
  hparams.relu_dropout = TRANSFORMER_HPARAMS['relu_dropout']
  hparams.summarize_vars = TRANSFORMER_HPARAMS['summarize_vars']

  return hparams

Example #29

Source File: transformer_hparams.py From Seq2seqChatbots with MIT License

5 votes

def chatbot_cornell_base():
  hparams = transformer.transformer_base()
  hparams.learning_rate_warmup_steps = 16000
  return hparams


# Different batch sizes.

Example #30

Source File: universal_transformer_modified.py From Graph-Transformer with Apache License 2.0

5 votes

def universal_transformer_small1():
  hparams = transformer.transformer_base()
  hparams = update_hparams_for_universal_transformer(hparams)
  return hparams