Python tensorflow.compat.v1.constant() Examples
The following are 30
code examples of tensorflow.compat.v1.constant().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.compat.v1
, or try the search function
.
Example #1
Source File: transformer.py From tensor2tensor with Apache License 2.0 | 6 votes |
def transformer_tall_pretrain_lm(): """Hparams for transformer on LM pretraining (with 64k vocab).""" hparams = transformer_tall() hparams.learning_rate_constant = 2e-4 hparams.learning_rate_schedule = ("linear_warmup*constant*cosdecay") hparams.optimizer = "adam_w" hparams.weight_decay = 0.01 * hparams.learning_rate_constant hparams.optimizer_adam_beta1 = 0.9 hparams.optimizer_adam_beta2 = 0.999 hparams.optimizer_adam_epsilon = 1e-8 # Set max examples to something big when pretraining only the LM, definitely # something an order of magnitude bigger than number of train steps. hparams.multiproblem_schedule_max_examples = 5e8 # Set train steps to learning_rate_decay_steps or less hparams.learning_rate_decay_steps = 5000000 return hparams
Example #2
Source File: metrics.py From tensor2tensor with Apache License 2.0 | 6 votes |
def two_class_log_likelihood(predictions, labels, weights_fn=None): """Log-likelihood for two class classification with 0/1 labels. Args: predictions: A float valued tensor of shape [`batch_size`]. Each component should be between 0 and 1. labels: An int valued tensor of shape [`batch_size`]. Each component should either be 0 or 1. weights_fn: unused. Returns: A pair, with the average log likelihood in the first component. """ del weights_fn float_predictions = tf.cast(tf.squeeze(predictions), dtype=tf.float64) batch_probs = tf.stack([1. - float_predictions, float_predictions], axis=-1) int_labels = tf.cast(tf.squeeze(labels), dtype=tf.int32) onehot_targets = tf.cast(tf.one_hot(int_labels, 2), dtype=tf.float64) chosen_probs = tf.einsum( "ij,ij->i", batch_probs, onehot_targets, name="chosen_probs") avg_log_likelihood = tf.reduce_mean(tf.log(chosen_probs)) return avg_log_likelihood, tf.constant(1.0)
Example #3
Source File: metrics_test.py From tensor2tensor with Apache License 2.0 | 6 votes |
def testMultilabelMatch3(self): predictions = np.random.randint(1, 5, size=(100, 1, 1, 1)) targets = np.random.randint(1, 5, size=(100, 10, 1, 1)) weights = np.random.randint(0, 2, size=(100, 1, 1, 1)) targets *= weights predictions_repeat = np.repeat(predictions, 10, axis=1) expected = (predictions_repeat == targets).astype(float) expected = np.sum(expected, axis=(1, 2, 3)) expected = np.minimum(expected / 3.0, 1.) expected = np.sum(expected * weights[:, 0, 0, 0]) / weights.shape[0] with self.test_session() as session: scores, weights_ = metrics.multilabel_accuracy_match3( tf.one_hot(predictions, depth=5, dtype=tf.float32), tf.constant(targets, dtype=tf.int32)) a, a_op = tf.metrics.mean(scores, weights_) session.run(tf.local_variables_initializer()) session.run(tf.global_variables_initializer()) _ = session.run(a_op) actual = session.run(a) self.assertAlmostEqual(actual, expected, places=6)
Example #4
Source File: metrics.py From tensor2tensor with Apache License 2.0 | 6 votes |
def sigmoid_accuracy_one_hot(logits, labels, weights_fn=None): """Calculate accuracy for a set, given one-hot labels and logits. Args: logits: Tensor of size [batch-size, o=1, p=1, num-classes] labels: Tensor of size [batch-size, o=1, p=1, num-classes] weights_fn: Function that takes in labels and weighs examples (unused) Returns: accuracy (scalar), weights """ with tf.variable_scope("sigmoid_accuracy_one_hot", values=[logits, labels]): del weights_fn predictions = tf.nn.sigmoid(logits) labels = tf.argmax(labels, -1) predictions = tf.argmax(predictions, -1) _, accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions) return accuracy, tf.constant(1.0)
Example #5
Source File: metrics.py From tensor2tensor with Apache License 2.0 | 6 votes |
def sigmoid_accuracy(logits, labels, weights_fn=None): """Calculate accuracy for a set, given integer labels and logits. Args: logits: Tensor of size [batch-size, o=1, p=1, num-classes] labels: Tensor of size [batch-size, o=1, p=1] weights_fn: Function that takes in labels and weighs examples (unused) Returns: accuracy (scalar), weights """ with tf.variable_scope("sigmoid_accuracy", values=[logits, labels]): del weights_fn predictions = tf.nn.sigmoid(logits) predictions = tf.argmax(predictions, -1) _, accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions) return accuracy, tf.constant(1.0)
Example #6
Source File: beam_search_test.py From tensor2tensor with Apache License 2.0 | 6 votes |
def testShapes(self): batch_size = 2 beam_size = 3 vocab_size = 4 decode_length = 10 initial_ids = tf.constant([0, 0]) # GO def symbols_to_logits(_): # Just return random logits return tf.random_uniform((batch_size * beam_size, vocab_size)) final_ids, final_probs, _ = beam_search.beam_search( symbols_to_logits, initial_ids, beam_size, decode_length, vocab_size, 0.) self.assertEqual(final_ids.get_shape().as_list(), [None, beam_size, None]) self.assertEqual(final_probs.get_shape().as_list(), [batch_size, beam_size])
Example #7
Source File: metrics.py From tensor2tensor with Apache License 2.0 | 6 votes |
def sigmoid_precision_one_hot(logits, labels, weights_fn=None): """Calculate precision for a set, given one-hot labels and logits. Predictions are converted to one-hot, as predictions[example][arg-max(example)] = 1 Args: logits: Tensor of size [batch-size, o=1, p=1, num-classes] labels: Tensor of size [batch-size, o=1, p=1, num-classes] weights_fn: Function that takes in labels and weighs examples (unused) Returns: precision (scalar), weights """ with tf.variable_scope("sigmoid_precision_one_hot", values=[logits, labels]): del weights_fn num_classes = logits.shape[-1] predictions = tf.nn.sigmoid(logits) predictions = tf.argmax(predictions, -1) predictions = tf.one_hot(predictions, num_classes) _, precision = tf.metrics.precision(labels=labels, predictions=predictions) return precision, tf.constant(1.0)
Example #8
Source File: metrics.py From tensor2tensor with Apache License 2.0 | 6 votes |
def sigmoid_recall_one_hot(logits, labels, weights_fn=None): """Calculate recall for a set, given one-hot labels and logits. Predictions are converted to one-hot, as predictions[example][arg-max(example)] = 1 Args: logits: Tensor of size [batch-size, o=1, p=1, num-classes] labels: Tensor of size [batch-size, o=1, p=1, num-classes] weights_fn: Function that takes in labels and weighs examples (unused) Returns: recall (scalar), weights """ with tf.variable_scope("sigmoid_recall_one_hot", values=[logits, labels]): del weights_fn num_classes = logits.shape[-1] predictions = tf.nn.sigmoid(logits) predictions = tf.argmax(predictions, -1) predictions = tf.one_hot(predictions, num_classes) _, recall = tf.metrics.recall(labels=labels, predictions=predictions) return recall, tf.constant(1.0)
Example #9
Source File: metrics.py From tensor2tensor with Apache License 2.0 | 6 votes |
def sigmoid_cross_entropy_one_hot(logits, labels, weights_fn=None): """Calculate sigmoid cross entropy for one-hot lanels and logits. Args: logits: Tensor of size [batch-size, o=1, p=1, num-classes] labels: Tensor of size [batch-size, o=1, p=1, num-classes] weights_fn: Function that takes in labels and weighs examples (unused) Returns: cross_entropy (scalar), weights """ with tf.variable_scope("sigmoid_cross_entropy_one_hot", values=[logits, labels]): del weights_fn cross_entropy = tf.losses.sigmoid_cross_entropy( multi_class_labels=labels, logits=logits) return cross_entropy, tf.constant(1.0)
Example #10
Source File: metrics_test.py From tensor2tensor with Apache License 2.0 | 6 votes |
def testNegativeLogPerplexityMaskedAssert(self): predictions = np.random.randint(4, size=(12, 12, 12, 1)) targets = np.random.randint(4, size=(12, 12, 12, 1)) features = {} with self.assertRaisesRegexp( ValueError, 'masked_neg_log_perplexity requires targets_mask feature'): with self.test_session() as session: scores, _ = metrics.padded_neg_log_perplexity_with_masking( tf.one_hot(predictions, depth=4, dtype=tf.float32), tf.constant(targets, dtype=tf.int32), features) a = tf.reduce_mean(scores) session.run(tf.global_variables_initializer()) _ = session.run(a)
Example #11
Source File: metrics.py From tensor2tensor with Apache License 2.0 | 6 votes |
def pearson_correlation_coefficient(predictions, labels, weights_fn=None): """Calculate pearson correlation coefficient. Args: predictions: The raw predictions. labels: The actual labels. weights_fn: Weighting function. Returns: The pearson correlation coefficient. """ del weights_fn _, pearson = contrib.metrics().streaming_pearson_correlation( predictions, labels) return pearson, tf.constant(1.0) # Metrics are functions that take predictions and labels and return # a tensor of metrics and a tensor of weights. # If the function has "features" as an argument, it will receive the whole # features dict as well. # The results are passed to tf.metrics.mean to accumulate properly.
Example #12
Source File: rouge.py From tensor2tensor with Apache License 2.0 | 6 votes |
def rouge_l_fscore(predictions, labels, **unused_kwargs): """ROUGE scores computation between labels and predictions. This is an approximate ROUGE scoring method since we do not glue word pieces or decode the ids and tokenize the output. Args: predictions: tensor, model predictions labels: tensor, gold output. Returns: rouge_l_fscore: approx rouge-l f1 score. """ outputs = tf.to_int32(tf.argmax(predictions, axis=-1)) # Convert the outputs and labels to a [batch_size, input_length] tensor. outputs = tf.squeeze(outputs, axis=[-1, -2]) labels = tf.squeeze(labels, axis=[-1, -2]) rouge_l_f_score = tf.py_func(rouge_l_sentence_level, (outputs, labels), tf.float32) return rouge_l_f_score, tf.constant(1.0)
Example #13
Source File: learning_rate.py From tensor2tensor with Apache License 2.0 | 6 votes |
def _learning_rate_warmup(warmup_steps, warmup_schedule="exp", hparams=None): """Learning rate warmup multiplier.""" if not warmup_steps: return tf.constant(1.) tf.logging.info("Applying %s learning rate warmup for %d steps", warmup_schedule, warmup_steps) warmup_steps = tf.to_float(warmup_steps) global_step = _global_step(hparams) if warmup_schedule == "exp": return tf.exp(tf.log(0.01) / warmup_steps)**(warmup_steps - global_step) else: assert warmup_schedule == "linear" start = tf.constant(0.35) return ((tf.constant(1.) - start) / warmup_steps) * global_step + start
Example #14
Source File: rouge_test.py From tensor2tensor with Apache License 2.0 | 6 votes |
def testRougeLMetricE2E(self): vocab_size = 4 batch_size = 12 seq_length = 12 predictions = tf.one_hot( np.random.randint(vocab_size, size=(batch_size, seq_length, 1, 1)), depth=4, dtype=tf.float32) targets = np.random.randint(4, size=(12, 12, 1, 1)) with self.test_session() as session: scores, _ = rouge.rouge_l_fscore( predictions, tf.constant(targets, dtype=tf.int32)) a = tf.reduce_mean(scores) session.run(tf.global_variables_initializer()) session.run(a)
Example #15
Source File: ssd_dataloader.py From benchmarks with Apache License 2.0 | 6 votes |
def normalize_image(images): """Normalize image to zero mean and unit variance. Args: images: a tensor representing images, at least 3-D. Returns: images normalized by mean and stdev. """ data_type = images.dtype mean = tf.constant(ssd_constants.NORMALIZATION_MEAN, data_type) std = tf.constant(ssd_constants.NORMALIZATION_STD, data_type) images = tf.divide(tf.subtract(images, mean), std) mlperf.logger.log(key=mlperf.tags.DATA_NORMALIZATION_MEAN, value=ssd_constants.NORMALIZATION_MEAN) mlperf.logger.log(key=mlperf.tags.DATA_NORMALIZATION_STD, value=ssd_constants.NORMALIZATION_STD) return images
Example #16
Source File: transformer.py From tensor2tensor with Apache License 2.0 | 6 votes |
def transformer_tall_train_tied(): """Tied means train CNN/DM summarization as LM.""" hparams = transformer_tall() hparams.multiproblem_max_input_length = 750 hparams.multiproblem_max_target_length = 100 hparams.multiproblem_schedule_max_examples = 0 hparams.learning_rate_schedule = ("linear_warmup*constant*cosdecay") hparams.learning_rate_constant = 2e-4 hparams.learning_rate_warmup_steps = 8000 # Set train steps to learning_rate_decay_steps or less hparams.learning_rate_decay_steps = 150000 hparams.multiproblem_target_eval_only = True hparams.multiproblem_reweight_label_loss = True hparams.multiproblem_label_weight = 1.0 hparams.optimizer = "true_adam" return hparams
Example #17
Source File: variable_mgr_util_test.py From benchmarks with Apache License 2.0 | 6 votes |
def testAppendGradientsWithLossScaleWithtNan(self): v = tf.Variable(0) training_ops = [] get_apply_gradients_ops_func = lambda: [tf.assign(v, v + 1)] loss_scale_params = variable_mgr_util.AutoLossScaleParams( enable_auto_loss_scale=True, loss_scale=tf.Variable(4, dtype=tf.float32), loss_scale_normal_steps=tf.Variable(10), inc_loss_scale_every_n=10, is_chief=True) variable_mgr_util.append_gradients_with_loss_scale( training_ops, get_apply_gradients_ops_func, loss_scale_params, grad_has_inf_nan=tf.constant(True)) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) sess.run(training_ops) self.assertEqual(sess.run(v), 0) # Skip updating for v. # halve loss_scale and reset local_scale_normal_steps. self.assertEqual(sess.run(loss_scale_params.loss_scale), 2) self.assertEqual(sess.run(loss_scale_params.loss_scale_normal_steps), 0)
Example #18
Source File: transformer.py From tensor2tensor with Apache License 2.0 | 6 votes |
def transformer_tall_finetune_tied(): """Tied means fine-tune CNN/DM summarization as LM.""" hparams = transformer_tall() hparams.multiproblem_max_input_length = 750 hparams.multiproblem_max_target_length = 100 hparams.multiproblem_schedule_max_examples = 0 hparams.learning_rate_schedule = ("linear_warmup*constant*cosdecay") hparams.learning_rate_constant = 5e-5 hparams.learning_rate_warmup_steps = 100 # Set train steps to learning_rate_decay_steps or less hparams.learning_rate_decay_steps = 80000 hparams.multiproblem_target_eval_only = True hparams.multiproblem_reweight_label_loss = True hparams.multiproblem_label_weight = 1.0 hparams.optimizer = "true_adam" return hparams
Example #19
Source File: metrics_test.py From tensor2tensor with Apache License 2.0 | 6 votes |
def testPrefixAccuracy(self): vocab_size = 10 predictions = tf.one_hot( tf.constant([[[1], [2], [3], [4], [9], [6], [7], [8]], [[1], [2], [3], [4], [5], [9], [7], [8]], [[1], [2], [3], [4], [5], [9], [7], [0]]]), vocab_size) labels = tf.expand_dims( tf.constant([[[1], [2], [3], [4], [5], [6], [7], [8]], [[1], [2], [3], [4], [5], [6], [7], [8]], [[1], [2], [3], [4], [5], [6], [7], [0]]]), axis=-1) expected_accuracy = np.average([4.0 / 8.0, 5.0 / 8.0, 5.0 / 7.0]) accuracy, _ = metrics.prefix_accuracy(predictions, labels) with self.test_session() as session: accuracy_value = session.run(accuracy) self.assertAlmostEqual(expected_accuracy, accuracy_value)
Example #20
Source File: bleu_hook.py From tensor2tensor with Apache License 2.0 | 6 votes |
def bleu_score(predictions, labels, **unused_kwargs): """BLEU score computation between labels and predictions. An approximate BLEU scoring method since we do not glue word pieces or decode the ids and tokenize the output. By default, we use ngram order of 4 and use brevity penalty. Also, this does not have beam search. Args: predictions: tensor, model predictions labels: tensor, gold output. Returns: bleu: int, approx bleu score """ outputs = tf.to_int32(tf.argmax(predictions, axis=-1)) # Convert the outputs and labels to a [batch_size, input_length] tensor. outputs = tf.squeeze(outputs, axis=[-1, -2]) labels = tf.squeeze(labels, axis=[-1, -2]) bleu = tf.py_func(compute_bleu, (labels, outputs), tf.float32) return bleu, tf.constant(1.0)
Example #21
Source File: rouge.py From tensor2tensor with Apache License 2.0 | 6 votes |
def rouge_2_fscore(predictions, labels, **unused_kwargs): """ROUGE-2 F1 score computation between labels and predictions. This is an approximate ROUGE scoring method since we do not glue word pieces or decode the ids and tokenize the output. Args: predictions: tensor, model predictions labels: tensor, gold output. Returns: rouge2_fscore: approx rouge-2 f1 score. """ outputs = tf.to_int32(tf.argmax(predictions, axis=-1)) # Convert the outputs and labels to a [batch_size, input_length] tensor. outputs = tf.squeeze(outputs, axis=[-1, -2]) labels = tf.squeeze(labels, axis=[-1, -2]) rouge_2_f_score = tf.py_func(rouge_n, (outputs, labels), tf.float32) return rouge_2_f_score, tf.constant(1.0)
Example #22
Source File: metrics_test.py From tensor2tensor with Apache License 2.0 | 5 votes |
def testNegativeLogPerplexity(self): predictions = np.random.randint(4, size=(12, 12, 12, 1)) targets = np.random.randint(4, size=(12, 12, 12, 1)) with self.test_session() as session: scores, _ = metrics.padded_neg_log_perplexity( tf.one_hot(predictions, depth=4, dtype=tf.float32), tf.constant(targets, dtype=tf.int32)) a = tf.reduce_mean(scores) session.run(tf.global_variables_initializer()) actual = session.run(a) self.assertEqual(actual.shape, ())
Example #23
Source File: metrics_test.py From tensor2tensor with Apache License 2.0 | 5 votes |
def testNegativeLogPerplexityMasked(self): predictions = np.random.randint(4, size=(12, 12, 12, 1)) targets = np.random.randint(4, size=(12, 12, 12, 1)) features = { 'targets_mask': tf.to_float(tf.ones([12, 12])) } with self.test_session() as session: scores, _ = metrics.padded_neg_log_perplexity_with_masking( tf.one_hot(predictions, depth=4, dtype=tf.float32), tf.constant(targets, dtype=tf.int32), features) a = tf.reduce_mean(scores) session.run(tf.global_variables_initializer()) actual = session.run(a) self.assertEqual(actual.shape, ())
Example #24
Source File: metrics_test.py From tensor2tensor with Apache License 2.0 | 5 votes |
def testTwoClassAccuracyMetric(self): predictions = tf.constant([0.0, 0.2, 0.4, 0.6, 0.8, 1.0], dtype=tf.float32) targets = tf.constant([0, 0, 1, 0, 1, 1], dtype=tf.int32) expected = 2.0 / 3.0 with self.test_session() as session: accuracy, _ = metrics.two_class_accuracy(predictions, targets) session.run(tf.global_variables_initializer()) session.run(tf.local_variables_initializer()) actual = session.run(accuracy) self.assertAlmostEqual(actual, expected)
Example #25
Source File: metrics_test.py From tensor2tensor with Apache License 2.0 | 5 votes |
def testUnpaddedRMSEMetric(self): predictions = np.full((10, 1), 1) # All 1's targets = np.full((10, 1), 3) # All 3's expected = np.mean((predictions - targets)**2) # MSE = 4.0 with self.test_session() as session: mse, _ = metrics.unpadded_mse( tf.constant(predictions, dtype=tf.int32), tf.constant(targets, dtype=tf.int32)) session.run(tf.global_variables_initializer()) actual = session.run(mse) self.assertEqual(actual, expected)
Example #26
Source File: metrics_test.py From tensor2tensor with Apache License 2.0 | 5 votes |
def testPearsonCorrelationCoefficient(self): predictions = np.random.rand(12, 1) targets = np.random.rand(12, 1) expected = np.corrcoef(np.squeeze(predictions), np.squeeze(targets))[0][1] with self.test_session() as session: pearson, _ = metrics.pearson_correlation_coefficient( tf.constant(predictions, dtype=tf.float32), tf.constant(targets, dtype=tf.float32)) session.run(tf.global_variables_initializer()) session.run(tf.local_variables_initializer()) actual = session.run(pearson) self.assertAlmostEqual(actual, expected)
Example #27
Source File: beam_search.py From tensor2tensor with Apache License 2.0 | 5 votes |
def _create_topk_unique(inputs, k): """Creates the top k values in sorted order with indices. Args: inputs: A tensor with rank of 2. [batch_size, original_size]. k: An integer, number of top elements to select. Returns: topk_r2: A tensor, the k largest elements. [batch_size, k]. topk_indices_r2: A tensor, indices of the top k values. [batch_size, k]. """ height = inputs.shape[0] width = inputs.shape[1] neg_inf_r0 = tf.constant(-np.inf, dtype=tf.float32) ones = tf.ones([height, width], dtype=tf.float32) neg_inf_r2 = ones * neg_inf_r0 inputs = tf.where(tf.is_nan(inputs), neg_inf_r2, inputs) # Select the current largest value k times and keep them in topk_r2. The # selected largest values are marked as the smallest value to avoid being # selected again. tmp = inputs topk_r2 = tf.zeros([height, k], dtype=tf.float32) for i in range(k): kth_order_statistic = tf.reduce_max(tmp, axis=1, keepdims=True) k_mask = tf.tile(tf.expand_dims(tf.equal(tf.range(k), tf.fill([k], i)), 0), [height, 1]) topk_r2 = tf.where(k_mask, tf.tile(kth_order_statistic, [1, k]), topk_r2) ge_r2 = tf.greater_equal(inputs, tf.tile(kth_order_statistic, [1, width])) tmp = tf.where(ge_r2, neg_inf_r2, inputs) log2_ceiling = int(math.ceil(math.log(float(int(width)), 2))) next_power_of_two = 1 << log2_ceiling count_mask = next_power_of_two - 1 mask_r0 = tf.constant(count_mask) mask_r2 = tf.fill([height, k], mask_r0) topk_r2_s32 = tf.bitcast(topk_r2, tf.int32) topk_indices_r2 = tf.bitwise.bitwise_and(topk_r2_s32, mask_r2) return topk_r2, topk_indices_r2
Example #28
Source File: decoding.py From tensor2tensor with Apache License 2.0 | 5 votes |
def _decode_input_tensor_to_features_dict(feature_map, hparams, decode_hp): """Convert the interactive input format (see above) to a dictionary. Args: feature_map: dict with inputs. hparams: model hyperparameters decode_hp: decode hyperparameters Returns: a features dictionary, as expected by the decoder. """ inputs = tf.convert_to_tensor(feature_map["inputs"]) input_is_image = False x = inputs p_hparams = hparams.problem_hparams # Add a third empty dimension x = tf.expand_dims(x, axis=[2]) x = tf.to_int32(x) input_space_id = tf.constant(p_hparams.input_space_id) target_space_id = tf.constant(p_hparams.target_space_id) features = {} features["input_space_id"] = input_space_id features["target_space_id"] = target_space_id features["decode_length"] = ( IMAGE_DECODE_LENGTH if input_is_image else tf.constant(decode_hp.extra_length)) features["inputs"] = x # Save inputs to "partial_targets" when prepending inputs to targets. Also # keep "inputs" as some models crash if they don't exist. if getattr(hparams, "prepend_mode", "none") != "none": shape = tf.shape(x) partial_targets = tf.reshape(x, [shape[0], shape[1]]) partial_targets = tf.pad(partial_targets, [[0, 0], [0, 1]]) features["partial_targets"] = partial_targets return features
Example #29
Source File: neural_gpu.py From tensor2tensor with Apache License 2.0 | 5 votes |
def diagonal_neural_gpu(inputs, hparams, name=None): """Improved Neural GPU as in https://arxiv.org/abs/1702.08727.""" with tf.variable_scope(name, "diagonal_neural_gpu"): def step(state_tup, inp): """Single step of the improved Neural GPU.""" state, _ = state_tup x = state for layer in range(hparams.num_hidden_layers): x, new_loss = common_layers.diagonal_conv_gru( x, (hparams.kernel_height, hparams.kernel_width), hparams.hidden_size, dropout=hparams.dropout, name="dcgru_%d" % layer) # Padding input is zeroed-out in the modality, we check this by summing. padding_inp = tf.less(tf.reduce_sum(tf.abs(inp), axis=[1, 2]), 0.00001) new_state = tf.where(padding_inp, state, x) # No-op where inp is padding. return new_state, new_loss final_state, losses = tf.scan( step, tf.transpose(inputs, [1, 0, 2, 3]), initializer=(inputs, tf.constant(0.0)), parallel_iterations=1, swap_memory=True) return final_state[0, :, :, :, :], 2.0 * tf.reduce_mean(losses)
Example #30
Source File: learning_rate.py From tensor2tensor with Apache License 2.0 | 5 votes |
def learning_rate_schedule(hparams): """Learning rate schedule based on hparams.""" mlperf_log.transformer_print(key=mlperf_log.OPT_LR, deferred=True) mlperf_log.transformer_print( key=mlperf_log.OPT_LR_WARMUP_STEPS, value=hparams.learning_rate_warmup_steps) step_num = _global_step(hparams) schedule_string = hparams.learning_rate_schedule names = schedule_string.split("*") names = [name.strip() for name in names if name.strip()] ret = tf.constant(1.0) for name in names: ret *= learning_rate_factor(name, step_num, hparams) return ret