Python tensorflow.compat.v1.reduce_mean() Examples
The following are 30
code examples of tensorflow.compat.v1.reduce_mean().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.compat.v1
, or try the search function
.
Example #1
Source File: shuffle_network.py From tensor2tensor with Apache License 2.0 | 6 votes |
def loss(self, logits, features): """Loss function for Neural Shuffle-Exchange network. We use custom loss function as default loss function doesn't use padding for calculating loss. We assume that output string is same length as the input. If you need other type of output please feel free to modify this. Args: logits: Logits from model features: Features, not in one-hot format Returns: tf.Tensor: Loss value """ onehot_labels = tf.one_hot(features["targets"], self._problem_hparams.vocab_size["targets"]) cost_vector = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits, labels=onehot_labels) return tf.reduce_mean(cost_vector)
Example #2
Source File: common_layers_test.py From tensor2tensor with Apache License 2.0 | 6 votes |
def testDmlLoss(self, batch, height, width, num_mixtures, reduce_sum): channels = 3 pred = tf.random_normal([batch, height, width, num_mixtures * 10]) labels = tf.random_uniform([batch, height, width, channels], minval=0, maxval=256, dtype=tf.int32) actual_loss_num, actual_loss_den = common_layers.dml_loss( pred=pred, labels=labels, reduce_sum=reduce_sum) actual_loss = actual_loss_num / actual_loss_den real_labels = common_layers.convert_rgb_to_symmetric_real(labels) expected_loss = common_layers.discretized_mix_logistic_loss( pred=pred, labels=real_labels) / channels if reduce_sum: expected_loss = tf.reduce_mean(expected_loss) actual_loss_val, expected_loss_val = self.evaluate( [actual_loss, expected_loss]) self.assertAllClose(actual_loss_val, expected_loss_val)
Example #3
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 6 votes |
def double_discriminator(x, filters1=128, filters2=None, kernel_size=8, strides=4, pure_mean=False): """A convolutional discriminator with 2 layers and concatenated output.""" if filters2 is None: filters2 = 4 * filters1 with tf.variable_scope("discriminator"): batch_size = shape_list(x)[0] net = layers().Conv2D( filters1, kernel_size, strides=strides, padding="SAME", name="conv1")(x) if pure_mean: net1 = tf.reduce_mean(net, [1, 2]) else: net1 = mean_with_attention(net, "mean_with_attention1") tf.reshape(net, [batch_size, -1]) net = tf.nn.relu(net) net = layers().Conv2D( filters2, kernel_size, strides=strides, padding="SAME", name="conv2")(net) if pure_mean: net2 = tf.reduce_mean(net, [1, 2]) else: net2 = mean_with_attention(net, "mean_with_attention2") return tf.concat([net1, net2], axis=-1)
Example #4
Source File: metrics.py From tensor2tensor with Apache License 2.0 | 6 votes |
def two_class_log_likelihood(predictions, labels, weights_fn=None): """Log-likelihood for two class classification with 0/1 labels. Args: predictions: A float valued tensor of shape [`batch_size`]. Each component should be between 0 and 1. labels: An int valued tensor of shape [`batch_size`]. Each component should either be 0 or 1. weights_fn: unused. Returns: A pair, with the average log likelihood in the first component. """ del weights_fn float_predictions = tf.cast(tf.squeeze(predictions), dtype=tf.float64) batch_probs = tf.stack([1. - float_predictions, float_predictions], axis=-1) int_labels = tf.cast(tf.squeeze(labels), dtype=tf.int32) onehot_targets = tf.cast(tf.one_hot(int_labels, 2), dtype=tf.float64) chosen_probs = tf.einsum( "ij,ij->i", batch_probs, onehot_targets, name="chosen_probs") avg_log_likelihood = tf.reduce_mean(tf.log(chosen_probs)) return avg_log_likelihood, tf.constant(1.0)
Example #5
Source File: cycle_gan.py From tensor2tensor with Apache License 2.0 | 6 votes |
def lossfn(real_input, fake_input, compress, hparams, lsgan, name): """Loss function.""" eps = 1e-12 with tf.variable_scope(name): d1 = discriminator(real_input, compress, hparams, "discriminator") d2 = discriminator(fake_input, compress, hparams, "discriminator", reuse=True) if lsgan: dloss = tf.reduce_mean( tf.squared_difference(d1, 0.9)) + tf.reduce_mean(tf.square(d2)) gloss = tf.reduce_mean(tf.squared_difference(d2, 0.9)) loss = (dloss + gloss)/2 else: # cross_entropy dloss = -tf.reduce_mean( tf.log(d1 + eps)) - tf.reduce_mean(tf.log1p(eps - d2)) gloss = -tf.reduce_mean(tf.log(d2 + eps)) loss = (dloss + gloss)/2 return loss
Example #6
Source File: discretization.py From tensor2tensor with Apache License 2.0 | 6 votes |
def vae(x, z_size, name=None): """Simple variational autoencoder without discretization. Args: x: Input to the discretization bottleneck. z_size: Number of bits, where discrete codes range from 1 to 2**z_size. name: Name for the bottleneck scope. Returns: Embedding function, latent, loss, mu and log_simga. """ with tf.variable_scope(name, default_name="vae"): mu = tf.layers.dense(x, z_size, name="mu") log_sigma = tf.layers.dense(x, z_size, name="log_sigma") shape = common_layers.shape_list(x) epsilon = tf.random_normal([shape[0], shape[1], 1, z_size]) z = mu + tf.exp(log_sigma / 2) * epsilon kl = 0.5 * tf.reduce_mean( tf.expm1(log_sigma) + tf.square(mu) - log_sigma, axis=-1) free_bits = z_size // 4 kl_loss = tf.reduce_mean(tf.maximum(kl - free_bits, 0.0)) return z, kl_loss, mu, log_sigma
Example #7
Source File: model.py From benchmarks with Apache License 2.0 | 6 votes |
def loss_function(self, inputs, build_network_result): """Returns the op to measure the loss of the model.""" logits = build_network_result.logits _, labels = inputs # TODO(laigd): consider putting the aux logit in the Inception model, # which could call super.loss_function twice, once with the normal logits # and once with the aux logits. aux_logits = build_network_result.extra_info with tf.name_scope('xentropy'): mlperf.logger.log(key=mlperf.tags.MODEL_HP_LOSS_FN, value=mlperf.tags.CCE) cross_entropy = tf.losses.sparse_softmax_cross_entropy( logits=logits, labels=labels) loss = tf.reduce_mean(cross_entropy, name='xentropy_mean') if aux_logits is not None: with tf.name_scope('aux_xentropy'): aux_cross_entropy = tf.losses.sparse_softmax_cross_entropy( logits=aux_logits, labels=labels) aux_loss = 0.4 * tf.reduce_mean(aux_cross_entropy, name='aux_loss') loss = tf.add_n([loss, aux_loss]) return loss
Example #8
Source File: evolved_transformer_test.py From tensor2tensor with Apache License 2.0 | 6 votes |
def _create_greedy_infer_model(self): """Creates model for greedy inference testing. Returns: model: A t2t model. features: An map of string to tensor. """ model, features = get_model(transformer.transformer_tiny()) out_logits, _ = model(features) out_logits = tf.squeeze(out_logits, axis=[2, 3]) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]), labels=tf.reshape(features["targets"], [-1])) loss = tf.reduce_mean(loss) apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss) with self.test_session(): tf.global_variables_initializer().run() for _ in range(10): apply_grad.run() model.set_mode(tf.estimator.ModeKeys.PREDICT) return model, features
Example #9
Source File: ssd_model.py From benchmarks with Apache License 2.0 | 6 votes |
def loss_function(self, inputs, build_network_result): logits = build_network_result.logits # Unpack model output back to locations and confidence scores of predictions # Shape of pred_loc: [batch_size, NUM_SSD_BOXES, 4] # Shape of pred_label: [batch_size, NUM_SSD_BOXES, label_num] pred_loc, pred_label = tf.split(logits, [4, self.label_num], 2) # Shape of gt_loc: [batch_size, NUM_SSD_BOXES, 4] # Shape of gt_label: [batch_size, NUM_SSD_BOXES, 1] # Shape of num_gt: [batch_size] _, gt_loc, gt_label, num_gt = inputs gt_label = tf.cast(gt_label, tf.int32) box_loss = self._localization_loss(pred_loc, gt_loc, gt_label, num_gt) class_loss = self._classification_loss(pred_label, gt_label, num_gt) tf.summary.scalar('box_loss', tf.reduce_mean(box_loss)) tf.summary.scalar('class_loss', tf.reduce_mean(class_loss)) return class_loss + box_loss
Example #10
Source File: utils.py From lamb with Apache License 2.0 | 6 votes |
def layer_norm(x, reduction_indices, epsilon=1e-9, gain=None, bias=None, per_element=True, scope=None): """DOC.""" reduction_indices = ensure_list(reduction_indices) mean = tf.reduce_mean(x, reduction_indices, keep_dims=True) variance = tf.reduce_mean(tf.squared_difference(x, mean), reduction_indices, keep_dims=True) normalized = (x - mean) / tf.sqrt(variance + epsilon) dtype = x.dtype shape = x.get_shape().as_list() for i in six.moves.range(len(shape)): if i not in reduction_indices or not per_element: shape[i] = 1 with tf.variable_scope(scope or 'layer_norm'): if gain is None: gain = tf.get_variable('gain', shape=shape, dtype=dtype, initializer=tf.ones_initializer()) if bias is None: bias = tf.get_variable('bias', shape=shape, dtype=dtype, initializer=tf.zeros_initializer()) return gain*normalized+bias
Example #11
Source File: metrics_test.py From tensor2tensor with Apache License 2.0 | 6 votes |
def testAccuracyTopKMetric(self): predictions = np.random.randint(1, 5, size=(12, 12, 12, 1)) targets = np.random.randint(1, 5, size=(12, 12, 12, 1)) expected = np.mean((predictions == targets).astype(float)) with self.test_session() as session: predicted = tf.one_hot(predictions, depth=5, dtype=tf.float32) scores1, _ = metrics.padded_accuracy_topk( predicted, tf.constant(targets, dtype=tf.int32), k=1) scores2, _ = metrics.padded_accuracy_topk( predicted, tf.constant(targets, dtype=tf.int32), k=7) a1 = tf.reduce_mean(scores1) a2 = tf.reduce_mean(scores2) session.run(tf.global_variables_initializer()) actual1, actual2 = session.run([a1, a2]) self.assertAlmostEqual(actual1, expected) self.assertAlmostEqual(actual2, 1.0)
Example #12
Source File: transformer_test.py From tensor2tensor with Apache License 2.0 | 6 votes |
def _create_greedy_infer_model(self): """Creates model for greedy inference testing. Returns: model: A t2t model. features: An map of string to tensor. """ model, features = get_model(transformer.transformer_small()) out_logits, _ = model(features) out_logits = tf.squeeze(out_logits, axis=[2, 3]) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]), labels=tf.reshape(features["targets"], [-1])) loss = tf.reduce_mean(loss) apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss) with self.test_session(): tf.global_variables_initializer().run() for _ in range(100): apply_grad.run() model.set_mode(tf.estimator.ModeKeys.PREDICT) return model, features
Example #13
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 6 votes |
def patch_discriminator(x, filters=64, filter_size=5, n=4, name="patch_discrim"): """Patch descriminator.""" with tf.variable_scope(name): x_shape = shape_list(x) spatial_dims = [x_shape[1] // 4, x_shape[2] // 4] x = tf.random_crop(x, [x_shape[0]] + spatial_dims + [x_shape[3]]) for i in range(n): x = general_conv( x=x, num_filters=filters * 2**i, filter_size=filter_size, stride=2 if i != n - 1 else 1, stddev=0.02, padding="SAME", name="c%d" % i, do_norm="instance" if i != 0 else False, do_relu=i != n - 1, relufactor=0.2) x = tf.reduce_mean(x, [1, 2]) return x
Example #14
Source File: transformer_nat.py From tensor2tensor with Apache License 2.0 | 6 votes |
def vq_nearest_neighbor(x, hparams): """Find the nearest element in means to elements in x.""" bottleneck_size = 2**hparams.bottleneck_bits means = hparams.means x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True) means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True) scalar_prod = tf.matmul(x, means, transpose_b=True) dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod if hparams.bottleneck_kind == "em": x_means_idx = tf.multinomial(-dist, num_samples=hparams.num_samples) x_means_hot = tf.one_hot( x_means_idx, depth=bottleneck_size) x_means_hot = tf.reduce_mean(x_means_hot, axis=1) else: x_means_idx = tf.argmax(-dist, axis=-1) x_means_hot = tf.one_hot(x_means_idx, depth=bottleneck_size) x_means = tf.matmul(x_means_hot, means) e_loss = tf.reduce_mean(tf.squared_difference(x, tf.stop_gradient(x_means))) return x_means_hot, e_loss
Example #15
Source File: base_vae.py From tensor2tensor with Apache License 2.0 | 6 votes |
def get_kl_loss(self, means, log_vars, means_p=None, log_vars_p=None): """Get KL loss for all the predicted Gaussians.""" kl_loss = 0.0 if means_p is None: means_p = tf.unstack(tf.zeros_like(means)) if log_vars_p is None: log_vars_p = tf.unstack(tf.zeros_like(log_vars)) enumerated_inputs = enumerate(zip(means, log_vars, means_p, log_vars_p)) if self.is_training and self.hparams.stochastic_model: for i, (mean, log_var, mean_p, log_var_p) in enumerated_inputs: kl_loss += common_layers.kl_divergence(mean, log_var, mean_p, log_var_p) tf.summary.histogram("posterior_mean_%d" % i, mean) tf.summary.histogram("posterior_log_var_%d" % i, log_var) tf.summary.histogram("prior_mean_%d" % i, mean_p) tf.summary.histogram("prior_log_var_%d" % i, log_var_p) tf.summary.scalar("kl_raw", tf.reduce_mean(kl_loss)) beta = self.get_beta(kl_loss) # information capacity from "Understanding disentangling in beta-VAE" if self.hparams.information_capacity > 0.0: kl_loss = tf.abs(kl_loss - self.hparams.information_capacity) return beta * kl_loss
Example #16
Source File: autoencoders.py From tensor2tensor with Apache License 2.0 | 6 votes |
def bottleneck(self, x): hparams = self.hparams z_size = hparams.bottleneck_bits x_shape = common_layers.shape_list(x) with tf.variable_scope("vae"): mu = tf.layers.dense(x, z_size, name="mu") if hparams.mode != tf.estimator.ModeKeys.TRAIN: return mu, 0.0 # No sampling or kl loss on eval. log_sigma = tf.layers.dense(x, z_size, name="log_sigma") epsilon = tf.random_normal(x_shape[:-1] + [z_size]) z = mu + tf.exp(log_sigma / 2) * epsilon kl = 0.5 * tf.reduce_mean( tf.expm1(log_sigma) + tf.square(mu) - log_sigma, axis=-1) free_bits = z_size // 4 kl_loss = tf.reduce_mean(tf.maximum(kl - free_bits, 0.0)) return z, kl_loss * hparams.kl_beta
Example #17
Source File: similarity_transformer.py From tensor2tensor with Apache License 2.0 | 6 votes |
def encode(self, features, input_key): hparams = self._hparams inputs = common_layers.flatten4d3d(features[input_key]) (encoder_input, encoder_self_attention_bias, _) = ( transformer.transformer_prepare_encoder(inputs, problem.SpaceID.EN_TOK, hparams)) encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.layer_prepostprocess_dropout) encoder_output = transformer.transformer_encoder( encoder_input, encoder_self_attention_bias, hparams, nonpadding=transformer.features_to_nonpadding(features, input_key)) encoder_output = tf.reduce_mean(encoder_output, axis=1) return encoder_output
Example #18
Source File: rouge_test.py From tensor2tensor with Apache License 2.0 | 6 votes |
def testRougeLMetricE2E(self): vocab_size = 4 batch_size = 12 seq_length = 12 predictions = tf.one_hot( np.random.randint(vocab_size, size=(batch_size, seq_length, 1, 1)), depth=4, dtype=tf.float32) targets = np.random.randint(4, size=(12, 12, 1, 1)) with self.test_session() as session: scores, _ = rouge.rouge_l_fscore( predictions, tf.constant(targets, dtype=tf.int32)) a = tf.reduce_mean(scores) session.run(tf.global_variables_initializer()) session.run(a)
Example #19
Source File: t2t_model.py From tensor2tensor with Apache License 2.0 | 6 votes |
def summarize_features(features, num_shards=1): """Generate summaries for features.""" if not common_layers.should_generate_summaries(): return with tf.name_scope("input_stats"): for (k, v) in sorted(six.iteritems(features)): if (isinstance(v, tf.Tensor) and (v.get_shape().ndims > 1) and (v.dtype != tf.string)): tf.summary.scalar("%s_batch" % k, tf.shape(v)[0] // num_shards) tf.summary.scalar("%s_length" % k, tf.shape(v)[1]) nonpadding = tf.to_float(tf.not_equal(v, 0)) nonpadding_tokens = tf.reduce_sum(nonpadding) tf.summary.scalar("%s_nonpadding_tokens" % k, nonpadding_tokens) tf.summary.scalar("%s_nonpadding_fraction" % k, tf.reduce_mean(nonpadding))
Example #20
Source File: t2t_model.py From tensor2tensor with Apache License 2.0 | 6 votes |
def average_sharded_losses(sharded_losses): """Average losses across datashards. Args: sharded_losses: list<dict<str loss_name, Tensor loss>>. The loss can be a single Tensor or a 2-tuple (numerator and denominator). Returns: losses: dict<str loss_name, Tensor avg_loss> """ losses = {} for loss_name in sorted(sharded_losses[0]): all_shards = [shard_losses[loss_name] for shard_losses in sharded_losses] if isinstance(all_shards[0], tuple): sharded_num, sharded_den = zip(*all_shards) mean_loss = ( tf.add_n(sharded_num) / tf.maximum( tf.cast(1.0, sharded_den[0].dtype), tf.add_n(sharded_den))) else: mean_loss = tf.reduce_mean(all_shards) losses[loss_name] = mean_loss return losses
Example #21
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 6 votes |
def layer_norm_compute(x, epsilon, scale, bias, layer_collection=None): """Layer norm raw computation.""" # Save these before they get converted to tensors by the casting below params = (scale, bias) epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]] mean = tf.reduce_mean(x, axis=[-1], keepdims=True) variance = tf.reduce_mean( tf.squared_difference(x, mean), axis=[-1], keepdims=True) norm_x = (x - mean) * tf.rsqrt(variance + epsilon) output = norm_x * scale + bias return output
Example #22
Source File: metrics_test.py From tensor2tensor with Apache License 2.0 | 6 votes |
def testTwoClassLogLikelihoodVersusOldImplementation(self): def alt_two_class_log_likelihood_impl(predictions, labels): float_labels = tf.cast(labels, dtype=tf.float64) float_predictions = tf.cast(tf.squeeze(predictions), dtype=tf.float64) # likelihood should be just p for class 1, and 1 - p for class 0. # signs is 1 for class 1, and -1 for class 0 signs = 2 * float_labels - tf.ones_like(float_labels) # constant_term is 1 for class 0, and 0 for class 1. constant_term = tf.ones_like(float_labels) - float_labels likelihoods = constant_term + signs * float_predictions log_likelihoods = tf.log(likelihoods) avg_log_likelihood = tf.reduce_mean(log_likelihoods) return avg_log_likelihood predictions = np.random.rand(1, 10, 1) targets = np.random.randint(2, size=10) with self.test_session() as session: new_log_likelihood, _ = metrics.two_class_log_likelihood( predictions, targets) alt_log_likelihood = alt_two_class_log_likelihood_impl( predictions, targets) new_impl, alt_impl = session.run([new_log_likelihood, alt_log_likelihood]) self.assertAlmostEqual(new_impl, alt_impl)
Example #23
Source File: savp.py From tensor2tensor with Apache License 2.0 | 5 votes |
def g_step(self, gen_frames, fake_logits_stop): """Performs the generator step in computing the GAN loss. Args: gen_frames: Generated frames fake_logits_stop: Logits corresponding to the generated frames as per the discriminator. Assumed to have a stop-gradient term. Returns: gan_g_loss_pos_d: Loss. gan_g_loss_neg_d: -gan_g_loss_pos_d but with a stop gradient on generator. """ hparam_to_gen_loss = { "least_squares": gan_losses.least_squares_generator_loss, "cross_entropy": gan_losses.modified_generator_loss, "wasserstein": gan_losses.wasserstein_generator_loss } fake_logits = self.discriminator(gen_frames) mean_fake_logits = tf.reduce_mean(fake_logits) tf.summary.scalar("mean_fake_logits", mean_fake_logits) # Generator loss. # Using gan_g_loss_pos_d updates the discriminator as well. # To avoid this add gan_g_loss_neg_d = -gan_g_loss_pos_d # but with stop gradient on the generator. # This makes sure that the net gradient on the discriminator is zero and # net-gradient on the generator is just due to the gan_g_loss_pos_d. generator_loss_func = hparam_to_gen_loss[self.hparams.gan_loss] gan_g_loss_pos_d = generator_loss_func( discriminator_gen_outputs=fake_logits, add_summaries=True) gan_g_loss_neg_d = -generator_loss_func( discriminator_gen_outputs=fake_logits_stop, add_summaries=True) return gan_g_loss_pos_d, gan_g_loss_neg_d
Example #24
Source File: area_attention_test.py From tensor2tensor with Apache License 2.0 | 5 votes |
def test2DAreaMax(self): batch_size = 256 feature_len = 100 memory_height = 10 heads = 2 key_len = 6 depth = 128 max_area_height = 3 max_area_width = 3 queries = tf.random_uniform([batch_size, heads, key_len, depth], minval=-10.0, maxval=10.0) features = tf.random_uniform([batch_size, heads, feature_len, depth], minval=-10.0, maxval=10.0) target_values = tf.random_uniform([batch_size, heads, key_len, depth], minval=-0.2, maxval=0.2) keys = tf.layers.dense(features, units=depth) values = tf.layers.dense(features, units=depth) max_attention = area_attention.dot_product_area_attention( queries, keys, values, bias=None, area_key_mode="max", area_value_mode="max", name="max_key", max_area_width=max_area_width, max_area_height=max_area_height, memory_height=memory_height) max_gradients = tf.gradients(tf.reduce_mean( tf.pow(target_values - max_attention, 2)), features) with self.test_session() as session: session.run(tf.global_variables_initializer()) result1, result2 = session.run([max_gradients, max_attention]) self.assertFalse(np.any(np.logical_not(np.isfinite(result1)))) self.assertFalse(np.any(np.logical_not(np.isfinite(result2))))
Example #25
Source File: evolved_transformer_test.py From tensor2tensor with Apache License 2.0 | 5 votes |
def testBeamVsFast(self): model, features = get_model(transformer.transformer_tiny()) decode_length = DECODE_LENGTH out_logits, _ = model(features) out_logits = tf.squeeze(out_logits, axis=[2, 3]) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]), labels=tf.reshape(features["targets"], [-1])) loss = tf.reduce_mean(loss) apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss) with self.test_session(): tf.global_variables_initializer().run() for _ in range(10): apply_grad.run() model.set_mode(tf.estimator.ModeKeys.PREDICT) with tf.variable_scope(tf.get_variable_scope(), reuse=True): beam_result = model._beam_decode_slow( features, decode_length, beam_size=4, top_beams=1, alpha=1.0)["outputs"] fast_result = model._beam_decode( features, decode_length, beam_size=4, top_beams=1, alpha=1.0)["outputs"] with self.test_session(): beam_res = beam_result.eval() fast_res = fast_result.eval() self.assertAllClose(beam_res, fast_res)
Example #26
Source File: evolved_transformer_test.py From tensor2tensor with Apache License 2.0 | 5 votes |
def testSlowVsFastNoInput(self): model, features = get_model(transformer.transformer_tiny(), has_input=False) decode_length = DECODE_LENGTH out_logits, _ = model(features) out_logits = tf.squeeze(out_logits, axis=[2, 3]) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]), labels=tf.reshape(features["targets"], [-1])) loss = tf.reduce_mean(loss) apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss) with self.test_session(): tf.global_variables_initializer().run() for _ in range(10): apply_grad.run() model.set_mode(tf.estimator.ModeKeys.PREDICT) with tf.variable_scope(tf.get_variable_scope(), reuse=True): slow_result = model._slow_greedy_infer(features, decode_length)["outputs"] slow_result = tf.squeeze(slow_result, axis=[2, 3]) fast_result = model._greedy_infer(features, decode_length)["outputs"] with self.test_session(): slow_res = slow_result.eval() fast_res = fast_result.eval() self.assertEqual(slow_res.shape, (BATCH_SIZE, decode_length)) self.assertAllClose(slow_res, fast_res)
Example #27
Source File: transformer_test.py From tensor2tensor with Apache License 2.0 | 5 votes |
def testSlowVsFast(self, get_model_fn=None, p=None): if get_model_fn: model, features = get_model_fn(param_overrides=p) else: model, features = get_model(transformer.transformer_small()) decode_length = 3 out_logits, _ = model(features) out_logits = tf.squeeze(out_logits, axis=[2, 3]) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]), labels=tf.reshape(features["targets"], [-1])) loss = tf.reduce_mean(loss) apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss) with self.test_session(): tf.global_variables_initializer().run() for _ in range(100): apply_grad.run() model.set_mode(tf.estimator.ModeKeys.PREDICT) with tf.variable_scope(tf.get_variable_scope(), reuse=True): greedy_result = model._slow_greedy_infer( features, decode_length)["outputs"] greedy_result = tf.squeeze(greedy_result, axis=[2, 3]) fast_result = model._greedy_infer(features, decode_length)["outputs"] with self.test_session(): greedy_res = greedy_result.eval() fast_res = fast_result.eval() self.assertEqual(fast_res.shape, (BATCH_SIZE, INPUT_LENGTH + decode_length)) self.assertAllClose(greedy_res, fast_res)
Example #28
Source File: transformer_test.py From tensor2tensor with Apache License 2.0 | 5 votes |
def testBeamVsFast(self): model, features = get_model(transformer.transformer_small()) decode_length = 2 out_logits, _ = model(features) out_logits = tf.squeeze(out_logits, axis=[2, 3]) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]), labels=tf.reshape(features["targets"], [-1])) loss = tf.reduce_mean(loss) apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss) with self.test_session(): tf.global_variables_initializer().run() for _ in range(100): apply_grad.run() model.set_mode(tf.estimator.ModeKeys.PREDICT) with tf.variable_scope(tf.get_variable_scope(), reuse=True): beam_result = model._beam_decode_slow( features, decode_length, beam_size=4, top_beams=1, alpha=1.0)["outputs"] fast_result = model._beam_decode( features, decode_length, beam_size=4, top_beams=1, alpha=1.0)["outputs"] with self.test_session(): beam_res = beam_result.eval() fast_res = fast_result.eval() self.assertAllClose(beam_res, fast_res)
Example #29
Source File: slicenet.py From tensor2tensor with Apache License 2.0 | 5 votes |
def slicenet_internal(inputs, targets, target_space, hparams, run_decoder=True): """The slicenet model, main step used for training.""" with tf.variable_scope("slicenet"): # Project to hidden size if necessary if inputs.get_shape().as_list()[-1] != hparams.hidden_size: inputs = common_layers.conv_block( inputs, hparams.hidden_size, [((1, 1), (3, 3))], first_relu=False, padding="SAME", force2d=True) # Flatten inputs and encode. inputs = tf.expand_dims(common_layers.flatten4d3d(inputs), axis=2) inputs_mask = 1.0 - embedding_to_padding(inputs) inputs = common_layers.add_timing_signal(inputs) # Add position info. target_space_emb = embed_target_space(target_space, hparams.hidden_size) extra_layers = int(hparams.num_hidden_layers * 1.5) inputs_encoded = multi_conv_res( inputs, "SAME", "encoder", extra_layers, hparams, mask=inputs_mask) if not run_decoder: return inputs_encoded # Do the middle part. decoder_start, similarity_loss = slicenet_middle( inputs_encoded, targets, target_space_emb, inputs_mask, hparams) # Decode. decoder_final = multi_conv_res( decoder_start, "LEFT", "decoder", hparams.num_hidden_layers, hparams, mask=inputs_mask, source=inputs_encoded) return decoder_final, tf.reduce_mean(similarity_loss)
Example #30
Source File: transformer_test.py From tensor2tensor with Apache License 2.0 | 5 votes |
def testSlowVsFastNoInput(self): model, features = get_model( transformer.transformer_small(), has_input=False) decode_length = 3 out_logits, _ = model(features) out_logits = tf.squeeze(out_logits, axis=[2, 3]) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]), labels=tf.reshape(features["targets"], [-1])) loss = tf.reduce_mean(loss) apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss) with self.test_session(): tf.global_variables_initializer().run() for _ in range(100): apply_grad.run() model.set_mode(tf.estimator.ModeKeys.PREDICT) with tf.variable_scope(tf.get_variable_scope(), reuse=True): slow_result = model._slow_greedy_infer( features, decode_length)["outputs"] slow_result = tf.squeeze(slow_result, axis=[2, 3]) fast_result = model._greedy_infer(features, decode_length)["outputs"] with self.test_session(): slow_res = slow_result.eval() fast_res = fast_result.eval() self.assertEqual(slow_res.shape, (BATCH_SIZE, decode_length)) self.assertAllClose(slow_res, fast_res)