Python tensor2tensor.models.transformer.transformer_tiny() Examples

The following are 27 code examples of tensor2tensor.models.transformer.transformer_tiny(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensor2tensor.models.transformer , or try the search function .
Example #1
Source File: evolved_transformer_test.py    From BERT with Apache License 2.0 6 votes vote down vote up
def _create_greedy_infer_model(self):
    """Creates model for greedy inference testing.

    Returns:
      model: A t2t model.
      features: An map of string to tensor.
    """
    model, features = get_model(transformer.transformer_tiny())

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(10):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    return model, features 
Example #2
Source File: evolved_transformer_test.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def _create_greedy_infer_model(self):
    """Creates model for greedy inference testing.

    Returns:
      model: A t2t model.
      features: An map of string to tensor.
    """
    model, features = get_model(transformer.transformer_tiny())

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(10):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    return model, features 
Example #3
Source File: universal_transformer.py    From BERT with Apache License 2.0 5 votes vote down vote up
def universal_transformer_tiny():
  hparams = transformer.transformer_tiny()
  hparams = update_hparams_for_universal_transformer(hparams)
  hparams.num_rec_steps = 8
  return hparams 
Example #4
Source File: universal_transformer_modified.py    From Graph-Transformer with Apache License 2.0 5 votes vote down vote up
def universal_transformer_tiny1():
  hparams = transformer.transformer_tiny()
  hparams = update_hparams_for_universal_transformer(hparams)
  hparams.num_rec_steps = 8
  return hparams 
Example #5
Source File: score2perf_hparams.py    From magenta with Apache License 2.0 5 votes vote down vote up
def t_rel_len2048_dropout15_tiny():
  """Hparams for LM with relative attention, tiny transformer."""
  # hparams = transformer.transformer_base()
  hparams = transformer.transformer_tiny()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_dropout(hparams, 0.15)
  hparams.self_attention_type = "dot_product_relative_v2"
  # Need to specify num_hidden_layers
  hparams.attention_key_channels = 512
  hparams.num_hidden_layers = 8
  return hparams 
Example #6
Source File: transformer_test.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def get_model(hparams=None, mode=tf.estimator.ModeKeys.TRAIN,
              has_input=True, model_cls=transformer.Transformer):
  if hparams is None:
    hparams = transformer.transformer_tiny()
  hparams.hidden_size = 8
  hparams.filter_size = 32
  hparams.num_heads = 1
  hparams.layer_prepostprocess_dropout = 0.0

  p_hparams = problem_hparams.test_problem_hparams(VOCAB_SIZE,
                                                   VOCAB_SIZE,
                                                   hparams)
  if not has_input:
    del p_hparams.modality["inputs"]
  hparams.problem_hparams = p_hparams

  inputs = -1 + np.random.random_integers(
      VOCAB_SIZE, size=(BATCH_SIZE, INPUT_LENGTH, 1, 1))
  targets = -1 + np.random.random_integers(
      VOCAB_SIZE, size=(BATCH_SIZE, TARGET_LENGTH, 1, 1))
  features = {
      "targets": tf.constant(targets, dtype=tf.int32, name="targets"),
      "target_space_id": tf.constant(1, dtype=tf.int32)
  }
  if has_input:
    features["inputs"] = tf.constant(inputs, dtype=tf.int32, name="inputs")

  return model_cls(hparams, mode, p_hparams), features 
Example #7
Source File: universal_transformer.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def universal_transformer_tiny():
  hparams = transformer.transformer_tiny()
  hparams = update_hparams_for_universal_transformer(hparams)
  hparams.num_rec_steps = 8
  return hparams 
Example #8
Source File: transformer_aux.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def transformer_aux_tiny():
  """Set of hyperparameters."""
  hparams = transformer.transformer_tiny()
  hparams.shared_embedding_and_softmax_weights = False
  hparams.add_hparam("shift_values", "1,2")
  return hparams 
Example #9
Source File: transformer_test.py    From BERT with Apache License 2.0 5 votes vote down vote up
def get_model(hparams=None, mode=tf.estimator.ModeKeys.TRAIN,
              has_input=True, model_cls=transformer.Transformer):
  if hparams is None:
    hparams = transformer.transformer_tiny()
  hparams.hidden_size = 8
  hparams.filter_size = 32
  hparams.num_heads = 1
  hparams.layer_prepostprocess_dropout = 0.0

  p_hparams = problem_hparams.test_problem_hparams(VOCAB_SIZE,
                                                   VOCAB_SIZE,
                                                   hparams)
  if not has_input:
    del p_hparams.modality["inputs"]
  hparams.problem_hparams = p_hparams

  inputs = np.random.randint(
      VOCAB_SIZE, size=(BATCH_SIZE, INPUT_LENGTH, 1, 1))
  targets = np.random.randint(
      VOCAB_SIZE, size=(BATCH_SIZE, TARGET_LENGTH, 1, 1))
  features = {
      "targets": tf.constant(targets, dtype=tf.int32, name="targets"),
      "target_space_id": tf.constant(1, dtype=tf.int32)
  }
  if has_input:
    features["inputs"] = tf.constant(inputs, dtype=tf.int32, name="inputs")

  return model_cls(hparams, mode, p_hparams), features 
Example #10
Source File: evolved_transformer_test.py    From BERT with Apache License 2.0 5 votes vote down vote up
def testBeamVsFast(self):
    model, features = get_model(transformer.transformer_tiny())

    decode_length = DECODE_LENGTH

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(10):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):
      beam_result = model._beam_decode_slow(
          features,
          decode_length,
          beam_size=4,
          top_beams=1,
          alpha=1.0)["outputs"]

      fast_result = model._beam_decode(
          features,
          decode_length,
          beam_size=4,
          top_beams=1,
          alpha=1.0)["outputs"]

    with self.test_session():
      beam_res = beam_result.eval()
      fast_res = fast_result.eval()

    self.assertAllClose(beam_res, fast_res) 
Example #11
Source File: evolved_transformer_test.py    From BERT with Apache License 2.0 5 votes vote down vote up
def testSlowVsFastNoInput(self):
    model, features = get_model(
        transformer.transformer_tiny(), has_input=False)

    decode_length = DECODE_LENGTH

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(10):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):
      slow_result = model._slow_greedy_infer(
          features, decode_length)["outputs"]
      slow_result = tf.squeeze(slow_result, axis=[2, 3])

      fast_result = model._greedy_infer(features, decode_length)["outputs"]

    with self.test_session():
      slow_res = slow_result.eval()
      fast_res = fast_result.eval()

    self.assertEqual(slow_res.shape, (BATCH_SIZE, decode_length))
    self.assertAllClose(slow_res, fast_res) 
Example #12
Source File: evolved_transformer_test.py    From BERT with Apache License 2.0 5 votes vote down vote up
def testSlowVsFast(self):
    model, features = get_model(transformer.transformer_tiny())

    decode_length = DECODE_LENGTH

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(10):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):
      greedy_result = model._slow_greedy_infer(
          features, decode_length)["outputs"]
      greedy_result = tf.squeeze(greedy_result, axis=[2, 3])

      fast_result = model._greedy_infer(features, decode_length)["outputs"]

    with self.test_session():
      greedy_res = greedy_result.eval()
      fast_res = fast_result.eval()

    self.assertEqual(fast_res.shape, (BATCH_SIZE, INPUT_LENGTH + decode_length))
    self.assertAllClose(greedy_res, fast_res) 
Example #13
Source File: transformer_parallel.py    From BERT with Apache License 2.0 5 votes vote down vote up
def transformer_tiny_bs3():
  hparams = transformer.transformer_tiny()
  hparams.add_hparam("block_size", 3)
  return hparams 
Example #14
Source File: transformer_parallel.py    From BERT with Apache License 2.0 5 votes vote down vote up
def transformer_tiny_bs2():
  hparams = transformer.transformer_tiny()
  hparams.add_hparam("block_size", 2)
  return hparams 
Example #15
Source File: transformer_parallel.py    From BERT with Apache License 2.0 5 votes vote down vote up
def transformer_tiny_bs1():
  hparams = transformer.transformer_tiny()
  hparams.add_hparam("block_size", 1)
  return hparams 
Example #16
Source File: universal_transformer.py    From fine-lm with MIT License 5 votes vote down vote up
def universal_transformer_tiny():
  hparams = transformer.transformer_tiny()
  hparams = update_hparams_for_universal_transformer(hparams)
  hparams.num_rec_steps = 8
  return hparams 
Example #17
Source File: transformer_aux.py    From BERT with Apache License 2.0 5 votes vote down vote up
def transformer_aux_tiny():
  """Set of hyperparameters."""
  hparams = transformer.transformer_tiny()
  hparams.shared_embedding_and_softmax_weights = False
  hparams.add_hparam("shift_values", "1,2")
  return hparams 
Example #18
Source File: evolved_transformer_test.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def testBeamVsFast(self):
    model, features = get_model(transformer.transformer_tiny())

    decode_length = DECODE_LENGTH

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(10):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):
      beam_result = model._beam_decode_slow(
          features, decode_length, beam_size=4, top_beams=1,
          alpha=1.0)["outputs"]

      fast_result = model._beam_decode(
          features, decode_length, beam_size=4, top_beams=1,
          alpha=1.0)["outputs"]

    with self.test_session():
      beam_res = beam_result.eval()
      fast_res = fast_result.eval()

    self.assertAllClose(beam_res, fast_res) 
Example #19
Source File: evolved_transformer_test.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def testSlowVsFastNoInput(self):
    model, features = get_model(transformer.transformer_tiny(), has_input=False)

    decode_length = DECODE_LENGTH

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(10):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):
      slow_result = model._slow_greedy_infer(features, decode_length)["outputs"]
      slow_result = tf.squeeze(slow_result, axis=[2, 3])

      fast_result = model._greedy_infer(features, decode_length)["outputs"]

    with self.test_session():
      slow_res = slow_result.eval()
      fast_res = fast_result.eval()

    self.assertEqual(slow_res.shape, (BATCH_SIZE, decode_length))
    self.assertAllClose(slow_res, fast_res) 
Example #20
Source File: evolved_transformer_test.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def testSlowVsFast(self):
    tf.set_random_seed(1234)
    model, features = get_model(transformer.transformer_tiny())

    decode_length = DECODE_LENGTH

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(10):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):
      greedy_result = model._slow_greedy_infer(features,
                                               decode_length)["outputs"]
      greedy_result = tf.squeeze(greedy_result, axis=[2, 3])

      fast_result = model._greedy_infer(features, decode_length)["outputs"]

    with self.test_session():
      greedy_res = greedy_result.eval()
      fast_res = fast_result.eval()

    self.assertEqual(fast_res.shape, (BATCH_SIZE, INPUT_LENGTH + decode_length))
    self.assertAllClose(greedy_res, fast_res) 
Example #21
Source File: evolved_transformer_test.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def testEvolvedTransformer(self):
    model, features = get_model(hparams=transformer.transformer_tiny())
    logits, _ = model(features)
    with self.test_session() as session:
      session.run(tf.global_variables_initializer())
      res = session.run(logits)
    self.assertEqual(res.shape, (BATCH_SIZE, TARGET_LENGTH, 1, 1, VOCAB_SIZE)) 
Example #22
Source File: transformer_parallel.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def transformer_tiny_bs3():
  hparams = transformer.transformer_tiny()
  hparams.add_hparam("block_size", 3)
  return hparams 
Example #23
Source File: transformer_parallel.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def transformer_tiny_bs2():
  hparams = transformer.transformer_tiny()
  hparams.add_hparam("block_size", 2)
  return hparams 
Example #24
Source File: transformer_parallel.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def transformer_tiny_bs1():
  hparams = transformer.transformer_tiny()
  hparams.add_hparam("block_size", 1)
  return hparams 
Example #25
Source File: universal_transformer.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def universal_transformer_tiny():
  hparams = transformer.transformer_tiny()
  hparams = update_hparams_for_universal_transformer(hparams)
  hparams.num_rec_steps = 8
  return hparams 
Example #26
Source File: transformer_aux.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def transformer_aux_tiny():
  """Set of hyperparameters."""
  hparams = transformer.transformer_tiny()
  hparams.shared_embedding_and_softmax_weights = False
  hparams.add_hparam("shift_values", "1,2")
  return hparams 
Example #27
Source File: transformer_test.py    From fine-lm with MIT License 5 votes vote down vote up
def get_model(hparams=None, mode=tf.estimator.ModeKeys.TRAIN,
              has_input=True, model_cls=transformer.Transformer):
  if hparams is None:
    hparams = transformer.transformer_tiny()
  hparams.hidden_size = 8
  hparams.filter_size = 32
  hparams.num_heads = 1
  hparams.layer_prepostprocess_dropout = 0.0

  p_hparams = problem_hparams.test_problem_hparams(VOCAB_SIZE, VOCAB_SIZE)
  if not has_input:
    p_hparams.input_modality = {}
  hparams.problem_hparams = p_hparams

  inputs = -1 + np.random.random_integers(
      VOCAB_SIZE, size=(BATCH_SIZE, INPUT_LENGTH, 1, 1))
  targets = -1 + np.random.random_integers(
      VOCAB_SIZE, size=(BATCH_SIZE, TARGET_LENGTH, 1, 1))
  features = {
      "targets": tf.constant(targets, dtype=tf.int32, name="targets"),
      "target_space_id": tf.constant(1, dtype=tf.int32)
  }
  if has_input:
    features["inputs"] = tf.constant(inputs, dtype=tf.int32, name="inputs")

  return model_cls(hparams, mode, p_hparams), features