Python Examples of tensorflow.compat.v1.constant

Source File: transformer.py From tensor2tensor with Apache License 2.0

6 votes

def transformer_tall_pretrain_lm():
  """Hparams for transformer on LM pretraining (with 64k vocab)."""
  hparams = transformer_tall()
  hparams.learning_rate_constant = 2e-4
  hparams.learning_rate_schedule = ("linear_warmup*constant*cosdecay")
  hparams.optimizer = "adam_w"
  hparams.weight_decay = 0.01 * hparams.learning_rate_constant
  hparams.optimizer_adam_beta1 = 0.9
  hparams.optimizer_adam_beta2 = 0.999
  hparams.optimizer_adam_epsilon = 1e-8
  # Set max examples to something big when pretraining only the LM, definitely
  # something an order of magnitude bigger than number of train steps.
  hparams.multiproblem_schedule_max_examples = 5e8
  # Set train steps to learning_rate_decay_steps or less
  hparams.learning_rate_decay_steps = 5000000
  return hparams

Source File: metrics.py From tensor2tensor with Apache License 2.0

6 votes

def two_class_log_likelihood(predictions, labels, weights_fn=None):
  """Log-likelihood for two class classification with 0/1 labels.

  Args:
    predictions: A float valued tensor of shape [`batch_size`].  Each
      component should be between 0 and 1.
    labels: An int valued tensor of shape [`batch_size`].  Each component
      should either be 0 or 1.
    weights_fn: unused.

  Returns:
    A pair, with the average log likelihood in the first component.
  """
  del weights_fn
  float_predictions = tf.cast(tf.squeeze(predictions), dtype=tf.float64)
  batch_probs = tf.stack([1. - float_predictions, float_predictions], axis=-1)
  int_labels = tf.cast(tf.squeeze(labels), dtype=tf.int32)
  onehot_targets = tf.cast(tf.one_hot(int_labels, 2), dtype=tf.float64)
  chosen_probs = tf.einsum(
      "ij,ij->i", batch_probs, onehot_targets, name="chosen_probs")
  avg_log_likelihood = tf.reduce_mean(tf.log(chosen_probs))
  return avg_log_likelihood, tf.constant(1.0)

Source File: metrics_test.py From tensor2tensor with Apache License 2.0

6 votes

def testMultilabelMatch3(self):
    predictions = np.random.randint(1, 5, size=(100, 1, 1, 1))
    targets = np.random.randint(1, 5, size=(100, 10, 1, 1))
    weights = np.random.randint(0, 2, size=(100, 1, 1, 1))
    targets *= weights

    predictions_repeat = np.repeat(predictions, 10, axis=1)
    expected = (predictions_repeat == targets).astype(float)
    expected = np.sum(expected, axis=(1, 2, 3))
    expected = np.minimum(expected / 3.0, 1.)
    expected = np.sum(expected * weights[:, 0, 0, 0]) / weights.shape[0]
    with self.test_session() as session:
      scores, weights_ = metrics.multilabel_accuracy_match3(
          tf.one_hot(predictions, depth=5, dtype=tf.float32),
          tf.constant(targets, dtype=tf.int32))
      a, a_op = tf.metrics.mean(scores, weights_)
      session.run(tf.local_variables_initializer())
      session.run(tf.global_variables_initializer())
      _ = session.run(a_op)
      actual = session.run(a)
    self.assertAlmostEqual(actual, expected, places=6)

Source File: metrics.py From tensor2tensor with Apache License 2.0

6 votes

def sigmoid_accuracy_one_hot(logits, labels, weights_fn=None):
  """Calculate accuracy for a set, given one-hot labels and logits.

  Args:
    logits: Tensor of size [batch-size, o=1, p=1, num-classes]
    labels: Tensor of size [batch-size, o=1, p=1, num-classes]
    weights_fn: Function that takes in labels and weighs examples (unused)
  Returns:
    accuracy (scalar), weights
  """
  with tf.variable_scope("sigmoid_accuracy_one_hot", values=[logits, labels]):
    del weights_fn
    predictions = tf.nn.sigmoid(logits)
    labels = tf.argmax(labels, -1)
    predictions = tf.argmax(predictions, -1)
    _, accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions)
    return accuracy, tf.constant(1.0)

Source File: metrics.py From tensor2tensor with Apache License 2.0

6 votes

def sigmoid_accuracy(logits, labels, weights_fn=None):
  """Calculate accuracy for a set, given integer labels and logits.

  Args:
    logits: Tensor of size [batch-size, o=1, p=1, num-classes]
    labels: Tensor of size [batch-size, o=1, p=1]
    weights_fn: Function that takes in labels and weighs examples (unused)
  Returns:
    accuracy (scalar), weights
  """
  with tf.variable_scope("sigmoid_accuracy", values=[logits, labels]):
    del weights_fn
    predictions = tf.nn.sigmoid(logits)
    predictions = tf.argmax(predictions, -1)
    _, accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions)
    return accuracy, tf.constant(1.0)

Source File: beam_search_test.py From tensor2tensor with Apache License 2.0

6 votes

def testShapes(self):
    batch_size = 2
    beam_size = 3
    vocab_size = 4
    decode_length = 10

    initial_ids = tf.constant([0, 0])  # GO

    def symbols_to_logits(_):
      # Just return random logits
      return tf.random_uniform((batch_size * beam_size, vocab_size))

    final_ids, final_probs, _ = beam_search.beam_search(
        symbols_to_logits, initial_ids, beam_size, decode_length, vocab_size,
        0.)

    self.assertEqual(final_ids.get_shape().as_list(), [None, beam_size, None])

    self.assertEqual(final_probs.get_shape().as_list(), [batch_size, beam_size])

Source File: metrics.py From tensor2tensor with Apache License 2.0

6 votes

def sigmoid_precision_one_hot(logits, labels, weights_fn=None):
  """Calculate precision for a set, given one-hot labels and logits.

  Predictions are converted to one-hot,
  as predictions[example][arg-max(example)] = 1

  Args:
    logits: Tensor of size [batch-size, o=1, p=1, num-classes]
    labels: Tensor of size [batch-size, o=1, p=1, num-classes]
    weights_fn: Function that takes in labels and weighs examples (unused)
  Returns:
    precision (scalar), weights
  """
  with tf.variable_scope("sigmoid_precision_one_hot", values=[logits, labels]):
    del weights_fn
    num_classes = logits.shape[-1]
    predictions = tf.nn.sigmoid(logits)
    predictions = tf.argmax(predictions, -1)
    predictions = tf.one_hot(predictions, num_classes)
    _, precision = tf.metrics.precision(labels=labels, predictions=predictions)
    return precision, tf.constant(1.0)

Source File: metrics.py From tensor2tensor with Apache License 2.0

6 votes

def sigmoid_recall_one_hot(logits, labels, weights_fn=None):
  """Calculate recall for a set, given one-hot labels and logits.

  Predictions are converted to one-hot,
  as predictions[example][arg-max(example)] = 1

  Args:
    logits: Tensor of size [batch-size, o=1, p=1, num-classes]
    labels: Tensor of size [batch-size, o=1, p=1, num-classes]
    weights_fn: Function that takes in labels and weighs examples (unused)
  Returns:
    recall (scalar), weights
  """
  with tf.variable_scope("sigmoid_recall_one_hot", values=[logits, labels]):
    del weights_fn
    num_classes = logits.shape[-1]
    predictions = tf.nn.sigmoid(logits)
    predictions = tf.argmax(predictions, -1)
    predictions = tf.one_hot(predictions, num_classes)
    _, recall = tf.metrics.recall(labels=labels, predictions=predictions)
    return recall, tf.constant(1.0)

Source File: metrics.py From tensor2tensor with Apache License 2.0

6 votes

def sigmoid_cross_entropy_one_hot(logits, labels, weights_fn=None):
  """Calculate sigmoid cross entropy for one-hot lanels and logits.

  Args:
    logits: Tensor of size [batch-size, o=1, p=1, num-classes]
    labels: Tensor of size [batch-size, o=1, p=1, num-classes]
    weights_fn: Function that takes in labels and weighs examples (unused)
  Returns:
    cross_entropy (scalar), weights
  """
  with tf.variable_scope("sigmoid_cross_entropy_one_hot",
                         values=[logits, labels]):
    del weights_fn
    cross_entropy = tf.losses.sigmoid_cross_entropy(
        multi_class_labels=labels, logits=logits)
    return cross_entropy, tf.constant(1.0)

Source File: metrics_test.py From tensor2tensor with Apache License 2.0

6 votes

def testNegativeLogPerplexityMaskedAssert(self):
    predictions = np.random.randint(4, size=(12, 12, 12, 1))
    targets = np.random.randint(4, size=(12, 12, 12, 1))
    features = {}

    with self.assertRaisesRegexp(
        ValueError,
        'masked_neg_log_perplexity requires targets_mask feature'):
      with self.test_session() as session:
        scores, _ = metrics.padded_neg_log_perplexity_with_masking(
            tf.one_hot(predictions, depth=4, dtype=tf.float32),
            tf.constant(targets, dtype=tf.int32),
            features)
        a = tf.reduce_mean(scores)
        session.run(tf.global_variables_initializer())
        _ = session.run(a)

Source File: metrics.py From tensor2tensor with Apache License 2.0

6 votes

def pearson_correlation_coefficient(predictions, labels, weights_fn=None):
  """Calculate pearson correlation coefficient.

  Args:
    predictions: The raw predictions.
    labels: The actual labels.
    weights_fn: Weighting function.

  Returns:
    The pearson correlation coefficient.
  """
  del weights_fn
  _, pearson = contrib.metrics().streaming_pearson_correlation(
      predictions, labels)
  return pearson, tf.constant(1.0)

# Metrics are functions that take predictions and labels and return
# a tensor of metrics and a tensor of weights.
# If the function has "features" as an argument, it will receive the whole
# features dict as well.
# The results are passed to tf.metrics.mean to accumulate properly.

Source File: rouge.py From tensor2tensor with Apache License 2.0

6 votes

def rouge_l_fscore(predictions, labels, **unused_kwargs):
  """ROUGE scores computation between labels and predictions.

  This is an approximate ROUGE scoring method since we do not glue word pieces
  or decode the ids and tokenize the output.

  Args:
    predictions: tensor, model predictions
    labels: tensor, gold output.

  Returns:
    rouge_l_fscore: approx rouge-l f1 score.
  """
  outputs = tf.to_int32(tf.argmax(predictions, axis=-1))
  # Convert the outputs and labels to a [batch_size, input_length] tensor.
  outputs = tf.squeeze(outputs, axis=[-1, -2])
  labels = tf.squeeze(labels, axis=[-1, -2])
  rouge_l_f_score = tf.py_func(rouge_l_sentence_level, (outputs, labels),
                               tf.float32)
  return rouge_l_f_score, tf.constant(1.0)

Source File: learning_rate.py From tensor2tensor with Apache License 2.0

6 votes

def _learning_rate_warmup(warmup_steps, warmup_schedule="exp", hparams=None):
  """Learning rate warmup multiplier."""
  if not warmup_steps:
    return tf.constant(1.)

  tf.logging.info("Applying %s learning rate warmup for %d steps",
                  warmup_schedule, warmup_steps)

  warmup_steps = tf.to_float(warmup_steps)
  global_step = _global_step(hparams)

  if warmup_schedule == "exp":
    return tf.exp(tf.log(0.01) / warmup_steps)**(warmup_steps - global_step)
  else:
    assert warmup_schedule == "linear"
    start = tf.constant(0.35)
    return ((tf.constant(1.) - start) / warmup_steps) * global_step + start

Source File: rouge_test.py From tensor2tensor with Apache License 2.0

6 votes

def testRougeLMetricE2E(self):
    vocab_size = 4
    batch_size = 12
    seq_length = 12
    predictions = tf.one_hot(
        np.random.randint(vocab_size, size=(batch_size, seq_length, 1, 1)),
        depth=4,
        dtype=tf.float32)
    targets = np.random.randint(4, size=(12, 12, 1, 1))
    with self.test_session() as session:
      scores, _ = rouge.rouge_l_fscore(
          predictions,
          tf.constant(targets, dtype=tf.int32))
      a = tf.reduce_mean(scores)
      session.run(tf.global_variables_initializer())
      session.run(a)

Source File: ssd_dataloader.py From benchmarks with Apache License 2.0

6 votes

def normalize_image(images):
  """Normalize image to zero mean and unit variance.

  Args:
    images: a tensor representing images, at least 3-D.
  Returns:
    images normalized by mean and stdev.
  """
  data_type = images.dtype
  mean = tf.constant(ssd_constants.NORMALIZATION_MEAN, data_type)
  std = tf.constant(ssd_constants.NORMALIZATION_STD, data_type)
  images = tf.divide(tf.subtract(images, mean), std)

  mlperf.logger.log(key=mlperf.tags.DATA_NORMALIZATION_MEAN,
                    value=ssd_constants.NORMALIZATION_MEAN)
  mlperf.logger.log(key=mlperf.tags.DATA_NORMALIZATION_STD,
                    value=ssd_constants.NORMALIZATION_STD)
  return images

Source File: transformer.py From tensor2tensor with Apache License 2.0

6 votes

def transformer_tall_train_tied():
  """Tied means train CNN/DM summarization as LM."""
  hparams = transformer_tall()
  hparams.multiproblem_max_input_length = 750
  hparams.multiproblem_max_target_length = 100
  hparams.multiproblem_schedule_max_examples = 0
  hparams.learning_rate_schedule = ("linear_warmup*constant*cosdecay")
  hparams.learning_rate_constant = 2e-4
  hparams.learning_rate_warmup_steps = 8000
  # Set train steps to learning_rate_decay_steps or less
  hparams.learning_rate_decay_steps = 150000
  hparams.multiproblem_target_eval_only = True
  hparams.multiproblem_reweight_label_loss = True
  hparams.multiproblem_label_weight = 1.0
  hparams.optimizer = "true_adam"
  return hparams

Source File: variable_mgr_util_test.py From benchmarks with Apache License 2.0

6 votes

def testAppendGradientsWithLossScaleWithtNan(self):
    v = tf.Variable(0)
    training_ops = []
    get_apply_gradients_ops_func = lambda: [tf.assign(v, v + 1)]
    loss_scale_params = variable_mgr_util.AutoLossScaleParams(
        enable_auto_loss_scale=True,
        loss_scale=tf.Variable(4, dtype=tf.float32),
        loss_scale_normal_steps=tf.Variable(10),
        inc_loss_scale_every_n=10,
        is_chief=True)
    variable_mgr_util.append_gradients_with_loss_scale(
        training_ops,
        get_apply_gradients_ops_func,
        loss_scale_params,
        grad_has_inf_nan=tf.constant(True))

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      sess.run(training_ops)
      self.assertEqual(sess.run(v), 0)  # Skip updating for v.
      # halve loss_scale and reset local_scale_normal_steps.
      self.assertEqual(sess.run(loss_scale_params.loss_scale), 2)
      self.assertEqual(sess.run(loss_scale_params.loss_scale_normal_steps), 0)

Source File: transformer.py From tensor2tensor with Apache License 2.0

6 votes

def transformer_tall_finetune_tied():
  """Tied means fine-tune CNN/DM summarization as LM."""
  hparams = transformer_tall()
  hparams.multiproblem_max_input_length = 750
  hparams.multiproblem_max_target_length = 100
  hparams.multiproblem_schedule_max_examples = 0
  hparams.learning_rate_schedule = ("linear_warmup*constant*cosdecay")
  hparams.learning_rate_constant = 5e-5
  hparams.learning_rate_warmup_steps = 100
  # Set train steps to learning_rate_decay_steps or less
  hparams.learning_rate_decay_steps = 80000
  hparams.multiproblem_target_eval_only = True
  hparams.multiproblem_reweight_label_loss = True
  hparams.multiproblem_label_weight = 1.0
  hparams.optimizer = "true_adam"
  return hparams

Source File: metrics_test.py From tensor2tensor with Apache License 2.0

6 votes

def testPrefixAccuracy(self):
    vocab_size = 10
    predictions = tf.one_hot(
        tf.constant([[[1], [2], [3], [4], [9], [6], [7], [8]],
                     [[1], [2], [3], [4], [5], [9], [7], [8]],
                     [[1], [2], [3], [4], [5], [9], [7], [0]]]),
        vocab_size)
    labels = tf.expand_dims(
        tf.constant([[[1], [2], [3], [4], [5], [6], [7], [8]],
                     [[1], [2], [3], [4], [5], [6], [7], [8]],
                     [[1], [2], [3], [4], [5], [6], [7], [0]]]),
        axis=-1)
    expected_accuracy = np.average([4.0 / 8.0,
                                    5.0 / 8.0,
                                    5.0 / 7.0])
    accuracy, _ = metrics.prefix_accuracy(predictions, labels)
    with self.test_session() as session:
      accuracy_value = session.run(accuracy)
      self.assertAlmostEqual(expected_accuracy, accuracy_value)

Source File: bleu_hook.py From tensor2tensor with Apache License 2.0

6 votes

def bleu_score(predictions, labels, **unused_kwargs):
  """BLEU score computation between labels and predictions.

  An approximate BLEU scoring method since we do not glue word pieces or
  decode the ids and tokenize the output. By default, we use ngram order of 4
  and use brevity penalty. Also, this does not have beam search.

  Args:
    predictions: tensor, model predictions
    labels: tensor, gold output.

  Returns:
    bleu: int, approx bleu score
  """
  outputs = tf.to_int32(tf.argmax(predictions, axis=-1))
  # Convert the outputs and labels to a [batch_size, input_length] tensor.
  outputs = tf.squeeze(outputs, axis=[-1, -2])
  labels = tf.squeeze(labels, axis=[-1, -2])

  bleu = tf.py_func(compute_bleu, (labels, outputs), tf.float32)
  return bleu, tf.constant(1.0)

Source File: rouge.py From tensor2tensor with Apache License 2.0

6 votes

def rouge_2_fscore(predictions, labels, **unused_kwargs):
  """ROUGE-2 F1 score computation between labels and predictions.

  This is an approximate ROUGE scoring method since we do not glue word pieces
  or decode the ids and tokenize the output.

  Args:
    predictions: tensor, model predictions
    labels: tensor, gold output.

  Returns:
    rouge2_fscore: approx rouge-2 f1 score.
  """

  outputs = tf.to_int32(tf.argmax(predictions, axis=-1))
  # Convert the outputs and labels to a [batch_size, input_length] tensor.
  outputs = tf.squeeze(outputs, axis=[-1, -2])
  labels = tf.squeeze(labels, axis=[-1, -2])
  rouge_2_f_score = tf.py_func(rouge_n, (outputs, labels), tf.float32)
  return rouge_2_f_score, tf.constant(1.0)

Source File: metrics_test.py From tensor2tensor with Apache License 2.0

5 votes

def testNegativeLogPerplexity(self):
    predictions = np.random.randint(4, size=(12, 12, 12, 1))
    targets = np.random.randint(4, size=(12, 12, 12, 1))
    with self.test_session() as session:
      scores, _ = metrics.padded_neg_log_perplexity(
          tf.one_hot(predictions, depth=4, dtype=tf.float32),
          tf.constant(targets, dtype=tf.int32))
      a = tf.reduce_mean(scores)
      session.run(tf.global_variables_initializer())
      actual = session.run(a)
    self.assertEqual(actual.shape, ())

Source File: metrics_test.py From tensor2tensor with Apache License 2.0

5 votes

def testNegativeLogPerplexityMasked(self):
    predictions = np.random.randint(4, size=(12, 12, 12, 1))
    targets = np.random.randint(4, size=(12, 12, 12, 1))
    features = {
        'targets_mask': tf.to_float(tf.ones([12, 12]))
    }
    with self.test_session() as session:
      scores, _ = metrics.padded_neg_log_perplexity_with_masking(
          tf.one_hot(predictions, depth=4, dtype=tf.float32),
          tf.constant(targets, dtype=tf.int32),
          features)
      a = tf.reduce_mean(scores)
      session.run(tf.global_variables_initializer())
      actual = session.run(a)
    self.assertEqual(actual.shape, ())

Source File: metrics_test.py From tensor2tensor with Apache License 2.0

5 votes

def testTwoClassAccuracyMetric(self):
    predictions = tf.constant([0.0, 0.2, 0.4, 0.6, 0.8, 1.0], dtype=tf.float32)
    targets = tf.constant([0, 0, 1, 0, 1, 1], dtype=tf.int32)
    expected = 2.0 / 3.0
    with self.test_session() as session:
      accuracy, _ = metrics.two_class_accuracy(predictions, targets)
      session.run(tf.global_variables_initializer())
      session.run(tf.local_variables_initializer())
      actual = session.run(accuracy)
    self.assertAlmostEqual(actual, expected)

Source File: metrics_test.py From tensor2tensor with Apache License 2.0

5 votes

def testUnpaddedRMSEMetric(self):
    predictions = np.full((10, 1), 1)  # All 1's
    targets = np.full((10, 1), 3)  # All 3's
    expected = np.mean((predictions - targets)**2)  # MSE = 4.0
    with self.test_session() as session:
      mse, _ = metrics.unpadded_mse(
          tf.constant(predictions, dtype=tf.int32),
          tf.constant(targets, dtype=tf.int32))
      session.run(tf.global_variables_initializer())
      actual = session.run(mse)
    self.assertEqual(actual, expected)

Source File: metrics_test.py From tensor2tensor with Apache License 2.0

5 votes

def testPearsonCorrelationCoefficient(self):
    predictions = np.random.rand(12, 1)
    targets = np.random.rand(12, 1)

    expected = np.corrcoef(np.squeeze(predictions), np.squeeze(targets))[0][1]
    with self.test_session() as session:
      pearson, _ = metrics.pearson_correlation_coefficient(
          tf.constant(predictions, dtype=tf.float32),
          tf.constant(targets, dtype=tf.float32))
      session.run(tf.global_variables_initializer())
      session.run(tf.local_variables_initializer())
      actual = session.run(pearson)
    self.assertAlmostEqual(actual, expected)

Source File: beam_search.py From tensor2tensor with Apache License 2.0

5 votes

def _create_topk_unique(inputs, k):
  """Creates the top k values in sorted order with indices.

  Args:
    inputs: A tensor with rank of 2. [batch_size, original_size].
    k: An integer, number of top elements to select.

  Returns:
    topk_r2: A tensor, the k largest elements. [batch_size, k].
    topk_indices_r2: A tensor, indices of the top k values. [batch_size, k].
  """
  height = inputs.shape[0]
  width = inputs.shape[1]
  neg_inf_r0 = tf.constant(-np.inf, dtype=tf.float32)
  ones = tf.ones([height, width], dtype=tf.float32)
  neg_inf_r2 = ones * neg_inf_r0
  inputs = tf.where(tf.is_nan(inputs), neg_inf_r2, inputs)

  # Select the current largest value k times and keep them in topk_r2. The
  # selected largest values are marked as the smallest value to avoid being
  # selected again.
  tmp = inputs
  topk_r2 = tf.zeros([height, k], dtype=tf.float32)
  for i in range(k):
    kth_order_statistic = tf.reduce_max(tmp, axis=1, keepdims=True)
    k_mask = tf.tile(tf.expand_dims(tf.equal(tf.range(k), tf.fill([k], i)), 0),
                     [height, 1])
    topk_r2 = tf.where(k_mask, tf.tile(kth_order_statistic, [1, k]), topk_r2)
    ge_r2 = tf.greater_equal(inputs, tf.tile(kth_order_statistic, [1, width]))
    tmp = tf.where(ge_r2, neg_inf_r2, inputs)

  log2_ceiling = int(math.ceil(math.log(float(int(width)), 2)))
  next_power_of_two = 1 << log2_ceiling
  count_mask = next_power_of_two - 1
  mask_r0 = tf.constant(count_mask)
  mask_r2 = tf.fill([height, k], mask_r0)
  topk_r2_s32 = tf.bitcast(topk_r2, tf.int32)
  topk_indices_r2 = tf.bitwise.bitwise_and(topk_r2_s32, mask_r2)
  return topk_r2, topk_indices_r2

Source File: decoding.py From tensor2tensor with Apache License 2.0

5 votes

def _decode_input_tensor_to_features_dict(feature_map, hparams, decode_hp):
  """Convert the interactive input format (see above) to a dictionary.

  Args:
    feature_map: dict with inputs.
    hparams: model hyperparameters
    decode_hp: decode hyperparameters

  Returns:
    a features dictionary, as expected by the decoder.
  """
  inputs = tf.convert_to_tensor(feature_map["inputs"])
  input_is_image = False

  x = inputs
  p_hparams = hparams.problem_hparams
  # Add a third empty dimension
  x = tf.expand_dims(x, axis=[2])
  x = tf.to_int32(x)
  input_space_id = tf.constant(p_hparams.input_space_id)
  target_space_id = tf.constant(p_hparams.target_space_id)

  features = {}
  features["input_space_id"] = input_space_id
  features["target_space_id"] = target_space_id
  features["decode_length"] = (
      IMAGE_DECODE_LENGTH if input_is_image else
      tf.constant(decode_hp.extra_length))
  features["inputs"] = x
  # Save inputs to "partial_targets" when prepending inputs to targets. Also
  # keep "inputs" as some models crash if they don't exist.
  if getattr(hparams, "prepend_mode", "none") != "none":
    shape = tf.shape(x)
    partial_targets = tf.reshape(x, [shape[0], shape[1]])
    partial_targets = tf.pad(partial_targets, [[0, 0], [0, 1]])
    features["partial_targets"] = partial_targets
  return features

Source File: neural_gpu.py From tensor2tensor with Apache License 2.0

5 votes

def diagonal_neural_gpu(inputs, hparams, name=None):
  """Improved Neural GPU as in https://arxiv.org/abs/1702.08727."""
  with tf.variable_scope(name, "diagonal_neural_gpu"):

    def step(state_tup, inp):
      """Single step of the improved Neural GPU."""
      state, _ = state_tup
      x = state
      for layer in range(hparams.num_hidden_layers):
        x, new_loss = common_layers.diagonal_conv_gru(
            x, (hparams.kernel_height, hparams.kernel_width),
            hparams.hidden_size,
            dropout=hparams.dropout,
            name="dcgru_%d" % layer)
      # Padding input is zeroed-out in the modality, we check this by summing.
      padding_inp = tf.less(tf.reduce_sum(tf.abs(inp), axis=[1, 2]), 0.00001)
      new_state = tf.where(padding_inp, state, x)  # No-op where inp is padding.
      return new_state, new_loss

    final_state, losses = tf.scan(
        step,
        tf.transpose(inputs, [1, 0, 2, 3]),
        initializer=(inputs, tf.constant(0.0)),
        parallel_iterations=1,
        swap_memory=True)
    return final_state[0, :, :, :, :], 2.0 * tf.reduce_mean(losses)

Source File: learning_rate.py From tensor2tensor with Apache License 2.0

5 votes

def learning_rate_schedule(hparams):
  """Learning rate schedule based on hparams."""
  mlperf_log.transformer_print(key=mlperf_log.OPT_LR, deferred=True)
  mlperf_log.transformer_print(
      key=mlperf_log.OPT_LR_WARMUP_STEPS,
      value=hparams.learning_rate_warmup_steps)
  step_num = _global_step(hparams)
  schedule_string = hparams.learning_rate_schedule
  names = schedule_string.split("*")
  names = [name.strip() for name in names if name.strip()]
  ret = tf.constant(1.0)
  for name in names:
    ret *= learning_rate_factor(name, step_num, hparams)
  return ret

Python tensorflow.compat.v1.constant() Examples