Python Examples of tensor2tensor.models.transformer.transformer

Source File: evolved_transformer_test.py From BERT with Apache License 2.0

6 votes

def _create_greedy_infer_model(self):
    """Creates model for greedy inference testing.

    Returns:
      model: A t2t model.
      features: An map of string to tensor.
    """
    model, features = get_model(transformer.transformer_tiny())

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(10):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    return model, features

Source File: evolved_transformer_test.py From tensor2tensor with Apache License 2.0

6 votes

def _create_greedy_infer_model(self):
    """Creates model for greedy inference testing.

    Returns:
      model: A t2t model.
      features: An map of string to tensor.
    """
    model, features = get_model(transformer.transformer_tiny())

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(10):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    return model, features

Source File: universal_transformer.py From BERT with Apache License 2.0

5 votes

def universal_transformer_tiny():
  hparams = transformer.transformer_tiny()
  hparams = update_hparams_for_universal_transformer(hparams)
  hparams.num_rec_steps = 8
  return hparams

Source File: universal_transformer_modified.py From Graph-Transformer with Apache License 2.0

5 votes

def universal_transformer_tiny1():
  hparams = transformer.transformer_tiny()
  hparams = update_hparams_for_universal_transformer(hparams)
  hparams.num_rec_steps = 8
  return hparams

Source File: score2perf_hparams.py From magenta with Apache License 2.0

5 votes

def t_rel_len2048_dropout15_tiny():
  """Hparams for LM with relative attention, tiny transformer."""
  # hparams = transformer.transformer_base()
  hparams = transformer.transformer_tiny()
  update_transformer_hparams_for_music(hparams)
  update_truncate_length(hparams, 2048)
  update_dropout(hparams, 0.15)
  hparams.self_attention_type = "dot_product_relative_v2"
  # Need to specify num_hidden_layers
  hparams.attention_key_channels = 512
  hparams.num_hidden_layers = 8
  return hparams

Source File: transformer_test.py From training_results_v0.5 with Apache License 2.0

5 votes

def get_model(hparams=None, mode=tf.estimator.ModeKeys.TRAIN,
              has_input=True, model_cls=transformer.Transformer):
  if hparams is None:
    hparams = transformer.transformer_tiny()
  hparams.hidden_size = 8
  hparams.filter_size = 32
  hparams.num_heads = 1
  hparams.layer_prepostprocess_dropout = 0.0

  p_hparams = problem_hparams.test_problem_hparams(VOCAB_SIZE,
                                                   VOCAB_SIZE,
                                                   hparams)
  if not has_input:
    del p_hparams.modality["inputs"]
  hparams.problem_hparams = p_hparams

  inputs = -1 + np.random.random_integers(
      VOCAB_SIZE, size=(BATCH_SIZE, INPUT_LENGTH, 1, 1))
  targets = -1 + np.random.random_integers(
      VOCAB_SIZE, size=(BATCH_SIZE, TARGET_LENGTH, 1, 1))
  features = {
      "targets": tf.constant(targets, dtype=tf.int32, name="targets"),
      "target_space_id": tf.constant(1, dtype=tf.int32)
  }
  if has_input:
    features["inputs"] = tf.constant(inputs, dtype=tf.int32, name="inputs")

  return model_cls(hparams, mode, p_hparams), features

Source File: universal_transformer.py From training_results_v0.5 with Apache License 2.0

5 votes

def universal_transformer_tiny():
  hparams = transformer.transformer_tiny()
  hparams = update_hparams_for_universal_transformer(hparams)
  hparams.num_rec_steps = 8
  return hparams

Source File: transformer_aux.py From training_results_v0.5 with Apache License 2.0

5 votes

def transformer_aux_tiny():
  """Set of hyperparameters."""
  hparams = transformer.transformer_tiny()
  hparams.shared_embedding_and_softmax_weights = False
  hparams.add_hparam("shift_values", "1,2")
  return hparams

Source File: transformer_test.py From BERT with Apache License 2.0

5 votes

def get_model(hparams=None, mode=tf.estimator.ModeKeys.TRAIN,
              has_input=True, model_cls=transformer.Transformer):
  if hparams is None:
    hparams = transformer.transformer_tiny()
  hparams.hidden_size = 8
  hparams.filter_size = 32
  hparams.num_heads = 1
  hparams.layer_prepostprocess_dropout = 0.0

  p_hparams = problem_hparams.test_problem_hparams(VOCAB_SIZE,
                                                   VOCAB_SIZE,
                                                   hparams)
  if not has_input:
    del p_hparams.modality["inputs"]
  hparams.problem_hparams = p_hparams

  inputs = np.random.randint(
      VOCAB_SIZE, size=(BATCH_SIZE, INPUT_LENGTH, 1, 1))
  targets = np.random.randint(
      VOCAB_SIZE, size=(BATCH_SIZE, TARGET_LENGTH, 1, 1))
  features = {
      "targets": tf.constant(targets, dtype=tf.int32, name="targets"),
      "target_space_id": tf.constant(1, dtype=tf.int32)
  }
  if has_input:
    features["inputs"] = tf.constant(inputs, dtype=tf.int32, name="inputs")

  return model_cls(hparams, mode, p_hparams), features

Source File: evolved_transformer_test.py From BERT with Apache License 2.0

5 votes

def testBeamVsFast(self):
    model, features = get_model(transformer.transformer_tiny())

    decode_length = DECODE_LENGTH

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(10):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):
      beam_result = model._beam_decode_slow(
          features,
          decode_length,
          beam_size=4,
          top_beams=1,
          alpha=1.0)["outputs"]

      fast_result = model._beam_decode(
          features,
          decode_length,
          beam_size=4,
          top_beams=1,
          alpha=1.0)["outputs"]

    with self.test_session():
      beam_res = beam_result.eval()
      fast_res = fast_result.eval()

    self.assertAllClose(beam_res, fast_res)

Source File: evolved_transformer_test.py From BERT with Apache License 2.0

5 votes

def testSlowVsFastNoInput(self):
    model, features = get_model(
        transformer.transformer_tiny(), has_input=False)

    decode_length = DECODE_LENGTH

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(10):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):
      slow_result = model._slow_greedy_infer(
          features, decode_length)["outputs"]
      slow_result = tf.squeeze(slow_result, axis=[2, 3])

      fast_result = model._greedy_infer(features, decode_length)["outputs"]

    with self.test_session():
      slow_res = slow_result.eval()
      fast_res = fast_result.eval()

    self.assertEqual(slow_res.shape, (BATCH_SIZE, decode_length))
    self.assertAllClose(slow_res, fast_res)

Source File: evolved_transformer_test.py From BERT with Apache License 2.0

5 votes

def testSlowVsFast(self):
    model, features = get_model(transformer.transformer_tiny())

    decode_length = DECODE_LENGTH

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(10):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):
      greedy_result = model._slow_greedy_infer(
          features, decode_length)["outputs"]
      greedy_result = tf.squeeze(greedy_result, axis=[2, 3])

      fast_result = model._greedy_infer(features, decode_length)["outputs"]

    with self.test_session():
      greedy_res = greedy_result.eval()
      fast_res = fast_result.eval()

    self.assertEqual(fast_res.shape, (BATCH_SIZE, INPUT_LENGTH + decode_length))
    self.assertAllClose(greedy_res, fast_res)

Source File: transformer_parallel.py From BERT with Apache License 2.0

5 votes

def transformer_tiny_bs3():
  hparams = transformer.transformer_tiny()
  hparams.add_hparam("block_size", 3)
  return hparams

Source File: transformer_parallel.py From BERT with Apache License 2.0

5 votes

def transformer_tiny_bs2():
  hparams = transformer.transformer_tiny()
  hparams.add_hparam("block_size", 2)
  return hparams

Source File: transformer_parallel.py From BERT with Apache License 2.0

5 votes

def transformer_tiny_bs1():
  hparams = transformer.transformer_tiny()
  hparams.add_hparam("block_size", 1)
  return hparams

Source File: universal_transformer.py From fine-lm with MIT License

5 votes

def universal_transformer_tiny():
  hparams = transformer.transformer_tiny()
  hparams = update_hparams_for_universal_transformer(hparams)
  hparams.num_rec_steps = 8
  return hparams

Source File: transformer_aux.py From BERT with Apache License 2.0

5 votes

def transformer_aux_tiny():
  """Set of hyperparameters."""
  hparams = transformer.transformer_tiny()
  hparams.shared_embedding_and_softmax_weights = False
  hparams.add_hparam("shift_values", "1,2")
  return hparams

Source File: evolved_transformer_test.py From tensor2tensor with Apache License 2.0

5 votes

def testBeamVsFast(self):
    model, features = get_model(transformer.transformer_tiny())

    decode_length = DECODE_LENGTH

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(10):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):
      beam_result = model._beam_decode_slow(
          features, decode_length, beam_size=4, top_beams=1,
          alpha=1.0)["outputs"]

      fast_result = model._beam_decode(
          features, decode_length, beam_size=4, top_beams=1,
          alpha=1.0)["outputs"]

    with self.test_session():
      beam_res = beam_result.eval()
      fast_res = fast_result.eval()

    self.assertAllClose(beam_res, fast_res)

Source File: evolved_transformer_test.py From tensor2tensor with Apache License 2.0

5 votes

def testSlowVsFastNoInput(self):
    model, features = get_model(transformer.transformer_tiny(), has_input=False)

    decode_length = DECODE_LENGTH

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(10):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):
      slow_result = model._slow_greedy_infer(features, decode_length)["outputs"]
      slow_result = tf.squeeze(slow_result, axis=[2, 3])

      fast_result = model._greedy_infer(features, decode_length)["outputs"]

    with self.test_session():
      slow_res = slow_result.eval()
      fast_res = fast_result.eval()

    self.assertEqual(slow_res.shape, (BATCH_SIZE, decode_length))
    self.assertAllClose(slow_res, fast_res)

Source File: evolved_transformer_test.py From tensor2tensor with Apache License 2.0

5 votes

def testSlowVsFast(self):
    tf.set_random_seed(1234)
    model, features = get_model(transformer.transformer_tiny())

    decode_length = DECODE_LENGTH

    out_logits, _ = model(features)
    out_logits = tf.squeeze(out_logits, axis=[2, 3])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]),
        labels=tf.reshape(features["targets"], [-1]))
    loss = tf.reduce_mean(loss)
    apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss)

    with self.test_session():
      tf.global_variables_initializer().run()
      for _ in range(10):
        apply_grad.run()

    model.set_mode(tf.estimator.ModeKeys.PREDICT)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):
      greedy_result = model._slow_greedy_infer(features,
                                               decode_length)["outputs"]
      greedy_result = tf.squeeze(greedy_result, axis=[2, 3])

      fast_result = model._greedy_infer(features, decode_length)["outputs"]

    with self.test_session():
      greedy_res = greedy_result.eval()
      fast_res = fast_result.eval()

    self.assertEqual(fast_res.shape, (BATCH_SIZE, INPUT_LENGTH + decode_length))
    self.assertAllClose(greedy_res, fast_res)

Source File: evolved_transformer_test.py From tensor2tensor with Apache License 2.0

5 votes

def testEvolvedTransformer(self):
    model, features = get_model(hparams=transformer.transformer_tiny())
    logits, _ = model(features)
    with self.test_session() as session:
      session.run(tf.global_variables_initializer())
      res = session.run(logits)
    self.assertEqual(res.shape, (BATCH_SIZE, TARGET_LENGTH, 1, 1, VOCAB_SIZE))

Source File: transformer_parallel.py From tensor2tensor with Apache License 2.0

5 votes

def transformer_tiny_bs3():
  hparams = transformer.transformer_tiny()
  hparams.add_hparam("block_size", 3)
  return hparams

Source File: transformer_parallel.py From tensor2tensor with Apache License 2.0

5 votes

def transformer_tiny_bs2():
  hparams = transformer.transformer_tiny()
  hparams.add_hparam("block_size", 2)
  return hparams

Source File: transformer_parallel.py From tensor2tensor with Apache License 2.0

5 votes

def transformer_tiny_bs1():
  hparams = transformer.transformer_tiny()
  hparams.add_hparam("block_size", 1)
  return hparams

Source File: universal_transformer.py From tensor2tensor with Apache License 2.0

5 votes

def universal_transformer_tiny():
  hparams = transformer.transformer_tiny()
  hparams = update_hparams_for_universal_transformer(hparams)
  hparams.num_rec_steps = 8
  return hparams

Source File: transformer_aux.py From tensor2tensor with Apache License 2.0

5 votes

def transformer_aux_tiny():
  """Set of hyperparameters."""
  hparams = transformer.transformer_tiny()
  hparams.shared_embedding_and_softmax_weights = False
  hparams.add_hparam("shift_values", "1,2")
  return hparams

Source File: transformer_test.py From fine-lm with MIT License

5 votes

def get_model(hparams=None, mode=tf.estimator.ModeKeys.TRAIN,
              has_input=True, model_cls=transformer.Transformer):
  if hparams is None:
    hparams = transformer.transformer_tiny()
  hparams.hidden_size = 8
  hparams.filter_size = 32
  hparams.num_heads = 1
  hparams.layer_prepostprocess_dropout = 0.0

  p_hparams = problem_hparams.test_problem_hparams(VOCAB_SIZE, VOCAB_SIZE)
  if not has_input:
    p_hparams.input_modality = {}
  hparams.problem_hparams = p_hparams

  inputs = -1 + np.random.random_integers(
      VOCAB_SIZE, size=(BATCH_SIZE, INPUT_LENGTH, 1, 1))
  targets = -1 + np.random.random_integers(
      VOCAB_SIZE, size=(BATCH_SIZE, TARGET_LENGTH, 1, 1))
  features = {
      "targets": tf.constant(targets, dtype=tf.int32, name="targets"),
      "target_space_id": tf.constant(1, dtype=tf.int32)
  }
  if has_input:
    features["inputs"] = tf.constant(inputs, dtype=tf.int32, name="inputs")

  return model_cls(hparams, mode, p_hparams), features

Python tensor2tensor.models.transformer.transformer_tiny() Examples