Python tensorflow.multinomial() Examples
The following are 30
code examples of tensorflow.multinomial().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow
, or try the search function
.
Example #1
Source File: goal_nav_agent.py From streetlearn with Apache License 2.0 | 6 votes |
def _head(self, policy_input, heading, xy, target_xy): """Build the head of the agent: linear policy and value function, and pass the auxiliary outputs through. """ # Linear policy and value function. policy_logits = snt.Linear( self._num_actions, name='policy_logits')(policy_input) baseline = tf.squeeze(snt.Linear(1, name='baseline')(policy_input), axis=-1) # Sample an action from the policy. new_action = tf.multinomial( policy_logits, num_samples=1, output_dtype=tf.int32) new_action = tf.squeeze(new_action, 1, name='new_action') return AgentOutput( new_action, policy_logits, baseline, heading, xy, target_xy)
Example #2
Source File: transformer_nat.py From BERT with Apache License 2.0 | 6 votes |
def vq_nearest_neighbor(x, hparams): """Find the nearest element in means to elements in x.""" bottleneck_size = 2**hparams.bottleneck_bits means = hparams.means x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True) means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True) scalar_prod = tf.matmul(x, means, transpose_b=True) dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod if hparams.bottleneck_kind == "em": x_means_idx = tf.multinomial(-dist, num_samples=hparams.num_samples) x_means_hot = tf.one_hot( x_means_idx, depth=bottleneck_size) x_means_hot = tf.reduce_mean(x_means_hot, axis=1) else: x_means_idx = tf.argmax(-dist, axis=-1) x_means_hot = tf.one_hot(x_means_idx, depth=bottleneck_size) x_means = tf.matmul(x_means_hot, means) e_loss = tf.reduce_mean(tf.squared_difference(x, tf.stop_gradient(x_means))) return x_means_hot, e_loss
Example #3
Source File: complete_scan.py From ScanComplete with Apache License 2.0 | 6 votes |
def predict_from_model(logit_groups_geometry, logit_groups_semantics, temperature): """Reconstruct predicted geometry and semantics from model output.""" predictions_geometry_list = [] for logit_group in logit_groups_geometry: if FLAGS.p_norm > 0: predictions_geometry_list.append(logit_group[:, :, :, :, 0]) else: logit_group_shape = logit_group.shape_as_list() logit_group = tf.reshape(logit_group, [-1, logit_group_shape[-1]]) samples = tf.multinomial(temperature * logit_group, 1) predictions_geometry_list.append( tf.reshape(samples, logit_group_shape[:-1])) predictions_semantics_list = [] if FLAGS.predict_semantics: for logit_group in logit_groups_semantics: predictions_semantics_list.append(tf.argmax(logit_group, 4)) else: predictions_semantics_list = [ tf.zeros(shape=predictions_geometry_list[0].shape, dtype=tf.uint8) ] * len(predictions_geometry_list) return predictions_geometry_list, predictions_semantics_list
Example #4
Source File: latent_layers.py From BERT with Apache License 2.0 | 6 votes |
def multinomial_sample(x, vocab_size=None, sampling_method="random", temperature=1.0): """Multinomial sampling from a n-dimensional tensor. Args: x: Tensor of shape [..., vocab_size]. Parameterizes logits of multinomial. vocab_size: Number of classes in multinomial distribution. sampling_method: String, "random" or otherwise deterministic. temperature: Positive float. Returns: Tensor of shape [...]. """ vocab_size = vocab_size or common_layers.shape_list(x)[-1] if sampling_method == "random" and temperature > 0.0: samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1) else: samples = tf.argmax(x, axis=-1) reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1]) return reshaped_samples
Example #5
Source File: pg_reinforce.py From tensorflow-reinforce with MIT License | 6 votes |
def sampleAction(self, states): # TODO: use this code piece when tf.multinomial gets better # sample action from current policy # actions = self.session.run(self.predicted_actions, {self.states: states})[0] # return actions[0] # temporary workaround def softmax(y): """ simple helper function here that takes unnormalized logprobs """ maxy = np.amax(y) e = np.exp(y - maxy) return e / np.sum(e) # epsilon-greedy exploration strategy if random.random() < self.exploration: return random.randint(0, self.num_actions-1) else: action_scores = self.session.run(self.action_scores, {self.states: states})[0] action_probs = softmax(action_scores) - 1e-5 action = np.argmax(np.random.multinomial(1, action_probs)) return action
Example #6
Source File: base_controller.py From EAS with MIT License | 6 votes |
def build_forward(self, _input): output = _input # [batch_size, num_steps, rnn_units] feature_dim = int(output.get_shape()[2]) # rnn_units output = tf.reshape(output, [-1, feature_dim]) # [batch_size * num_steps, rnn_units] final_activation = 'sigmoid' if self.out_dim == 1 else 'softmax' if self.net_type == 'simple': net_config = [] if self.net_config is None else self.net_config with tf.variable_scope('wider_actor'): for layer in net_config: units, activation = layer.get('units'), layer.get('activation', 'relu') output = BasicModel.fc_layer(output, units, use_bias=True) output = BasicModel.activation(output, activation) logits = BasicModel.fc_layer(output, self.out_dim, use_bias=True) # [batch_size * num_steps, out_dim] probs = BasicModel.activation(logits, final_activation) # [batch_size * num_steps, out_dim] probs_dim = self.out_dim if self.out_dim == 1: probs = tf.concat([1 - probs, probs], axis=1) probs_dim = 2 self.decision = tf.multinomial(tf.log(probs), 1) # [batch_size * num_steps, 1] self.decision = tf.reshape(self.decision, [-1, self.num_steps]) # [batch_size, num_steps] self.probs = tf.reshape(probs, [-1, self.num_steps, probs_dim]) # [batch_size, num_steps, out_dim] else: raise ValueError('Do not support %s' % self.net_type)
Example #7
Source File: transformer_nat.py From training_results_v0.5 with Apache License 2.0 | 6 votes |
def vq_nearest_neighbor(x, hparams): """Find the nearest element in means to elements in x.""" bottleneck_size = 2**hparams.bottleneck_bits means = hparams.means x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True) means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True) scalar_prod = tf.matmul(x, means, transpose_b=True) dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod if hparams.bottleneck_kind == "em": x_means_idx = tf.multinomial(-dist, num_samples=hparams.num_samples) x_means_hot = tf.one_hot( x_means_idx, depth=bottleneck_size) x_means_hot = tf.reduce_mean(x_means_hot, axis=1) else: x_means_idx = tf.argmax(-dist, axis=-1) x_means_hot = tf.one_hot(x_means_idx, depth=bottleneck_size) x_means = tf.matmul(x_means_hot, means) e_loss = tf.reduce_mean(tf.square(x - tf.stop_gradient(x_means))) return x_means_hot, e_loss
Example #8
Source File: common_layers.py From training_results_v0.5 with Apache License 2.0 | 6 votes |
def sample_with_temperature(logits, temperature): """Either argmax or random sampling. Args: logits: a Tensor. temperature: a float 0.0=argmax 1.0=random Returns: a Tensor with one fewer dimension than logits. """ if temperature == 0.0: # TF argmax doesn't handle >5 dimensions, so we reshape here. logits_shape = shape_list(logits) argmax = tf.argmax(tf.reshape(logits, [-1, logits_shape[-1]]), axis=1) return tf.reshape(argmax, logits_shape[:-1]) else: assert temperature > 0.0 reshaped_logits = ( tf.reshape(logits, [-1, shape_list(logits)[-1]]) / temperature) choices = tf.multinomial(reshaped_logits, 1) choices = tf.reshape(choices, shape_list(logits)[:logits.get_shape().ndims - 1]) return choices
Example #9
Source File: latent_layers.py From training_results_v0.5 with Apache License 2.0 | 6 votes |
def multinomial_sample(x, vocab_size=None, sampling_method="random", temperature=1.0): """Multinomial sampling from a n-dimensional tensor. Args: x: Tensor of shape [..., vocab_size]. Parameterizes logits of multinomial. vocab_size: Number of classes in multinomial distribution. sampling_method: String, "random" or otherwise deterministic. temperature: Positive float. Returns: Tensor of shape [...]. """ vocab_size = vocab_size or common_layers.shape_list(x)[-1] if sampling_method == "random" and temperature > 0.0: samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1) else: samples = tf.argmax(x, axis=-1) reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1]) return reshaped_samples
Example #10
Source File: discretization.py From fine-lm with MIT License | 6 votes |
def vq_nearest_neighbor(x, means, soft_em=False, num_samples=10): """Find the nearest element in means to elements in x.""" bottleneck_size = common_layers.shape_list(means)[0] x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True) means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True) scalar_prod = tf.matmul(x, means, transpose_b=True) dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod if soft_em: x_means_idx = tf.multinomial(-dist, num_samples=num_samples) x_means_hot = tf.one_hot( x_means_idx, depth=common_layers.shape_list(means)[0]) x_means_hot = tf.reduce_mean(x_means_hot, axis=1) else: x_means_idx = tf.argmax(-dist, axis=-1) x_means_hot = tf.one_hot(x_means_idx, bottleneck_size) x_means_hot_flat = tf.reshape(x_means_hot, [-1, bottleneck_size]) x_means = tf.matmul(x_means_hot_flat, means) e_loss = tf.reduce_mean(tf.square(x - tf.stop_gradient(x_means))) return x_means_hot, e_loss
Example #11
Source File: graph_factory.py From neural-symbolic-machines with Apache License 2.0 | 6 votes |
def create_softmax_from_logits(logits): "Create nodes for softmax computation from logits." temperature = tf.placeholder_with_default( 1.0, shape=(), name='temperature') logits = logits / temperature logits_shape = tf.shape(logits) logits_dim = logits_shape[-1] logits_2d = tf.reshape(logits, [-1, logits_dim]) samples = tf.multinomial(logits_2d, 1) samples = tf.reshape(samples, logits_shape[:-1]) probs = tf.nn.softmax(logits) predictions = tf.argmax(probs, axis=2) return logits, probs, predictions, samples, temperature # Embedding
Example #12
Source File: latent_layers.py From fine-lm with MIT License | 6 votes |
def multinomial_sample(x, vocab_size, sampling_method, temperature): """Multinomial sampling from a n-dimensional tensor. Args: x: Tensor of shape [..., vocab_size]. Parameterizes logits of multinomial. vocab_size: Number of classes in multinomial distribution. sampling_method: String, "random" or otherwise deterministic. temperature: Positive float. Returns: Tensor of shape [...]. """ if sampling_method == "random": samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1) else: samples = tf.argmax(x, axis=-1) reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1]) return reshaped_samples
Example #13
Source File: transformer_nat.py From fine-lm with MIT License | 6 votes |
def vq_nearest_neighbor(x, hparams): """Find the nearest element in means to elements in x.""" bottleneck_size = 2**hparams.bottleneck_bits means = hparams.means x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True) means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True) scalar_prod = tf.matmul(x, means, transpose_b=True) dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod if hparams.bottleneck_kind == "em": x_means_idx = tf.multinomial(-dist, num_samples=hparams.num_samples) x_means_hot = tf.one_hot( x_means_idx, depth=bottleneck_size) x_means_hot = tf.reduce_mean(x_means_hot, axis=1) else: x_means_idx = tf.argmax(-dist, axis=-1) x_means_hot = tf.one_hot(x_means_idx, depth=bottleneck_size) x_means = tf.matmul(x_means_hot, means) e_loss = tf.reduce_mean(tf.square(x - tf.stop_gradient(x_means))) return x_means_hot, e_loss
Example #14
Source File: pg_actor_critic.py From Codes-for-RL-PER with MIT License | 6 votes |
def sampleAction(self, states): # TODO: use this code piece when tf.multinomial gets better # sample action from current policy # actions = self.session.run(self.predicted_actions, {self.states: states})[0] # return actions[0] # temporary workaround def softmax(y): """ simple helper function here that takes unnormalized logprobs """ maxy = np.amax(y) e = np.exp(y - maxy) return e / np.sum(e) # epsilon-greedy exploration strategy if random.random() < self.exploration: return random.randint(0, self.num_actions-1) else: action_scores = self.session.run(self.action_scores, {self.states: states})[0] action_probs = softmax(action_scores) - 1e-5 action = np.argmax(np.random.multinomial(1, action_probs)) return action
Example #15
Source File: pg_actor_critic.py From tensorflow-reinforce with MIT License | 6 votes |
def sampleAction(self, states): # TODO: use this code piece when tf.multinomial gets better # sample action from current policy # actions = self.session.run(self.predicted_actions, {self.states: states})[0] # return actions[0] # temporary workaround def softmax(y): """ simple helper function here that takes unnormalized logprobs """ maxy = np.amax(y) e = np.exp(y - maxy) return e / np.sum(e) # epsilon-greedy exploration strategy if random.random() < self.exploration: return random.randint(0, self.num_actions-1) else: action_scores = self.session.run(self.action_scores, {self.states: states})[0] action_probs = softmax(action_scores) - 1e-5 action = np.argmax(np.random.multinomial(1, action_probs)) return action
Example #16
Source File: policy_net.py From gail_ppo_tf with MIT License | 5 votes |
def __init__(self, name: str, env): """ :param name: string :param env: gym env """ ob_space = env.observation_space act_space = env.action_space with tf.variable_scope(name): self.obs = tf.placeholder(dtype=tf.float32, shape=[None] + list(ob_space.shape), name='obs') with tf.variable_scope('policy_net'): layer_1 = tf.layers.dense(inputs=self.obs, units=20, activation=tf.tanh) layer_2 = tf.layers.dense(inputs=layer_1, units=20, activation=tf.tanh) layer_3 = tf.layers.dense(inputs=layer_2, units=act_space.n, activation=tf.tanh) self.act_probs = tf.layers.dense(inputs=layer_3, units=act_space.n, activation=tf.nn.softmax) with tf.variable_scope('value_net'): layer_1 = tf.layers.dense(inputs=self.obs, units=20, activation=tf.tanh) layer_2 = tf.layers.dense(inputs=layer_1, units=20, activation=tf.tanh) self.v_preds = tf.layers.dense(inputs=layer_2, units=1, activation=None) self.act_stochastic = tf.multinomial(tf.log(self.act_probs), num_samples=1) self.act_stochastic = tf.reshape(self.act_stochastic, shape=[-1]) self.act_deterministic = tf.argmax(self.act_probs, axis=1) self.scope = tf.get_variable_scope().name
Example #17
Source File: common_layers.py From BERT with Apache License 2.0 | 5 votes |
def sample_with_temperature(logits, temperature, sampling_keep_top_k=-1): """Either argmax or random sampling. Args: logits: a Tensor. temperature: a float 0.0=argmax 1.0=random sampling_keep_top_k: If not -1, only sample from the top k logits. Returns: a Tensor with one fewer dimension than logits. """ if temperature == 0.0: # TF argmax doesn't handle >5 dimensions, so we reshape here. logits_shape = shape_list(logits) argmax = tf.argmax(tf.reshape(logits, [-1, logits_shape[-1]]), axis=1) return tf.reshape(argmax, logits_shape[:-1]) else: assert temperature > 0.0 if sampling_keep_top_k != -1: if sampling_keep_top_k <= 0: raise ValueError("sampling_keep_top_k must either be -1 or positive.") vocab_size = shape_list(logits)[1] k_largest = tf.contrib.nn.nth_element( logits, n=sampling_keep_top_k, reverse=True) k_largest = tf.tile(tf.reshape(k_largest, [-1, 1]), [1, vocab_size]) # Force every position that is not in the top k to have probability near # 0 by setting the logit to be very negative. logits = tf.where(tf.less_equal(logits, k_largest), tf.ones_like(logits)*-1e6, logits) reshaped_logits = ( tf.reshape(logits, [-1, shape_list(logits)[-1]]) / temperature) choices = tf.multinomial(reshaped_logits, 1) choices = tf.reshape(choices, shape_list(logits)[:logits.get_shape().ndims - 1]) return choices
Example #18
Source File: multinomial_op_test.py From deep_image_model with Apache License 2.0 | 5 votes |
def testLargeLogits(self): for neg in [True, False]: with self.test_session(use_gpu=self.use_gpu): logits = np.array([[1000.] * 5]) if neg: logits *= -1 samples = tf.multinomial(logits, 10).eval() # Sampled classes should be in-range. self.assertTrue((samples >= 0).all()) self.assertTrue((samples < 5).all())
Example #19
Source File: decoders.py From DeepChatModels with MIT License | 5 votes |
def sample(self, projected_output): """Return integer ID tensor representing the sampled word. Args: projected_output: Tensor [1, 1, state_size], representing a single decoder timestep output. """ # TODO: We really need a tf.control_dependencies check here (for rank). with tf.name_scope('decoder_sampler', values=[projected_output]): # Protect against extra size-1 dimensions; grab the 1D tensor # of size state_size. logits = tf.squeeze(projected_output) if self.temperature < 0.02: return tf.argmax(logits, axis=0) # Convert logits to probability distribution. probabilities = tf.div(logits, self.temperature) projected_output = tf.div( tf.exp(probabilities), tf.reduce_sum(tf.exp(probabilities), axis=-1)) # Sample 1 time from the probability distribution. sample_ID = tf.squeeze( tf.multinomial(tf.expand_dims(probabilities, 0), 1)) return sample_ID
Example #20
Source File: discretization.py From training_results_v0.5 with Apache License 2.0 | 5 votes |
def vq_nearest_neighbor(x, means, soft_em=False, num_samples=10, temperature=None): """Find the nearest element in means to elements in x.""" bottleneck_size = common_layers.shape_list(means)[0] x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True) means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True) scalar_prod = tf.matmul(x, means, transpose_b=True) dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod if soft_em: x_means_idx = tf.multinomial(-dist, num_samples=num_samples) x_means_hot = tf.one_hot( x_means_idx, depth=common_layers.shape_list(means)[0]) x_means_hot = tf.reduce_mean(x_means_hot, axis=1) else: if temperature is None: x_means_idx = tf.argmax(-dist, axis=-1) else: x_means_idx = tf.multinomial(- dist / temperature, 1) x_means_idx = tf.squeeze(x_means_idx, axis=-1) if (common_layers.should_generate_summaries() and not common_layers.is_xla_compiled()): tf.summary.histogram("means_idx", tf.reshape(x_means_idx, [-1])) x_means_hot = tf.one_hot(x_means_idx, bottleneck_size) x_means_hot_flat = tf.reshape(x_means_hot, [-1, bottleneck_size]) x_means = tf.matmul(x_means_hot_flat, means) e_loss = tf.reduce_mean(tf.square(x - tf.stop_gradient(x_means))) return x_means_hot, e_loss, dist
Example #21
Source File: latent_layers.py From training_results_v0.5 with Apache License 2.0 | 5 votes |
def ae_latent_softmax(latents_pred, latents_discrete_hot, vocab_size, hparams): """Latent prediction and loss. Args: latents_pred: Tensor of shape [..., depth]. latents_discrete_hot: Tensor of shape [..., vocab_size]. vocab_size: an int representing the vocab size. hparams: tf.contrib.training.HParams. Returns: sample: Tensor of shape [...], a sample from a multinomial distribution. loss: Tensor of shape [...], the softmax cross-entropy. """ with tf.variable_scope("latent_logits"): latents_logits = tf.layers.dense(latents_pred, vocab_size, name="logits_dense") if hparams.logit_normalization: latents_logits *= tf.rsqrt(1e-8 + tf.reduce_mean(tf.square(latents_logits))) loss = tf.nn.softmax_cross_entropy_with_logits_v2( labels=latents_discrete_hot, logits=latents_logits) # TODO(trandustin): tease this out from ae_latent_softmax. # we use just the loss portion to anchor prior / encoder on text. sample = multinomial_sample(latents_logits, vocab_size, hparams.sampling_method, hparams.sampling_temp) return sample, loss
Example #22
Source File: transformer_vae.py From training_results_v0.5 with Apache License 2.0 | 5 votes |
def multinomial_sample(x, vocab_size, temperature): """Multinomial sampling from a n-dimensional tensor.""" if temperature > 0: samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1) else: samples = tf.argmax(x, axis=-1) reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1]) return tf.to_int32(reshaped_samples)
Example #23
Source File: multinomial_op_test.py From deep_image_model with Apache License 2.0 | 5 votes |
def testSmallEntropy(self): tf.set_random_seed(1618) with self.test_session(use_gpu=self.use_gpu): # A logit value of -10 corresponds to a probability of ~5e-5. logits = tf.constant([[-10., 10., -10.], [-10., -10., 10.]]) num_samples = 1000 samples = tf.multinomial(logits, num_samples).eval() self.assertAllEqual([[1] * num_samples, [2] * num_samples], samples)
Example #24
Source File: models.py From DRL_DeliveryDuel with MIT License | 5 votes |
def create_dc_actor_critic(self, h_size, num_layers): num_streams = 1 hidden_streams = self.create_new_obs(num_streams, h_size, num_layers) hidden = hidden_streams[0] if self.use_recurrent: tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32) self.prev_action = tf.placeholder(shape=[None], dtype=tf.int32, name='prev_action') self.prev_action_oh = c_layers.one_hot_encoding(self.prev_action, self.a_size) hidden = tf.concat([hidden, self.prev_action_oh], axis=1) self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in') hidden, self.memory_out = self.create_recurrent_encoder(hidden, self.memory_in) self.memory_out = tf.identity(self.memory_out, name='recurrent_out') self.policy = tf.layers.dense(hidden, self.a_size, activation=None, use_bias=False, kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01)) self.all_probs = tf.nn.softmax(self.policy, name="action_probs") self.output = tf.multinomial(self.policy, 1) self.output = tf.identity(self.output, name="action") self.value = tf.layers.dense(hidden, 1, activation=None) self.value = tf.identity(self.value, name="value_estimate") self.entropy = -tf.reduce_sum(self.all_probs * tf.log(self.all_probs + 1e-10), axis=1) self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32) self.selected_actions = c_layers.one_hot_encoding(self.action_holder, self.a_size) self.all_old_probs = tf.placeholder(shape=[None, self.a_size], dtype=tf.float32, name='old_probabilities') # We reshape these tensors to [batch x 1] in order to be of the same rank as continuous control probabilities. self.probs = tf.expand_dims(tf.reduce_sum(self.all_probs * self.selected_actions, axis=1), 1) self.old_probs = tf.expand_dims(tf.reduce_sum(self.all_old_probs * self.selected_actions, axis=1), 1)
Example #25
Source File: models.py From DRL_DeliveryDuel with MIT License | 5 votes |
def __init__(self, brain, h_size=128, lr=1e-4, n_layers=2, m_size=128, normalize=False, use_recurrent=False): LearningModel.__init__(self, m_size, normalize, use_recurrent, brain) num_streams = 1 hidden_streams = self.create_new_obs(num_streams, h_size, n_layers) hidden = hidden_streams[0] self.dropout_rate = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_rate") hidden_reg = tf.layers.dropout(hidden, self.dropout_rate) if self.use_recurrent: self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in') hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in) self.memory_out = tf.identity(self.memory_out, name='recurrent_out') self.policy = tf.layers.dense(hidden_reg, self.a_size, activation=None, use_bias=False, kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01)) if brain.vector_action_space_type == "discrete": self.action_probs = tf.nn.softmax(self.policy) self.sample_action_float = tf.multinomial(self.policy, 1) self.sample_action_float = tf.identity(self.sample_action_float, name="action") self.sample_action = tf.cast(self.sample_action_float, tf.int32) self.true_action = tf.placeholder(shape=[None], dtype=tf.int32, name="teacher_action") self.action_oh = tf.one_hot(self.true_action, self.a_size) self.loss = tf.reduce_sum(-tf.log(self.action_probs + 1e-10) * self.action_oh) self.action_percent = tf.reduce_mean(tf.cast( tf.equal(tf.cast(tf.argmax(self.action_probs, axis=1), tf.int32), self.sample_action), tf.float32)) else: self.sample_action = tf.identity(self.policy, name="action") self.true_action = tf.placeholder(shape=[None, self.a_size], dtype=tf.float32, name="teacher_action") self.loss = tf.reduce_sum(tf.squared_difference(self.true_action, self.sample_action)) optimizer = tf.train.AdamOptimizer(learning_rate=lr) self.update = optimizer.minimize(self.loss)
Example #26
Source File: multinomial_op_test.py From deep_image_model with Apache License 2.0 | 5 votes |
def testEmpty(self): classes = 5 with self.test_session(use_gpu=self.use_gpu): for batch in 0, 3: for samples in 0, 7: x = tf.multinomial(tf.zeros([batch, classes]), samples).eval() self.assertEqual(x.shape, (batch, samples))
Example #27
Source File: multinomial_op_test.py From deep_image_model with Apache License 2.0 | 5 votes |
def testEmptyClasses(self): with self.test_session(use_gpu=self.use_gpu): x = tf.multinomial(tf.zeros([5, 0]), 7) with self.assertRaisesOpError("num_classes should be positive"): x.eval()
Example #28
Source File: controller.py From enas with Apache License 2.0 | 5 votes |
def _build_sampler(self): """Build the sampler ops and the log_prob ops.""" arc_seq = [] sample_log_probs = [] all_h = [] # sampler ops inputs = self.g_emb prev_c = [tf.zeros([1, self.lstm_size], dtype=tf.float32) for _ in xrange(self.lstm_num_layers)] prev_h = [tf.zeros([1, self.lstm_size], dtype=tf.float32) for _ in xrange(self.lstm_num_layers)] for layer_id in xrange(self.num_layers): for branch_id in xrange(self.num_branches): next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) all_h.append(tf.stop_gradient(next_h[-1])) logits = tf.matmul(next_h[-1], self.w_soft) if self.temperature is not None: logits /= self.temperature if self.tanh_constant is not None: logits = self.tanh_constant * tf.tanh(logits) config_id = tf.multinomial(logits, 1) config_id = tf.to_int32(config_id) config_id = tf.reshape(config_id, [1]) arc_seq.append(config_id) log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=config_id) sample_log_probs.append(log_prob) inputs = tf.nn.embedding_lookup(self.w_emb, config_id) arc_seq = tf.concat(arc_seq, axis=0) self.sample_arc = arc_seq self.sample_log_probs = tf.concat(sample_log_probs, axis=0) self.ppl = tf.exp(tf.reduce_sum(self.sample_log_probs) / tf.to_float(self.num_layers * self.num_branches)) self.all_h = all_h
Example #29
Source File: utils_tf.py From incremental_detectors with BSD 3-Clause "New" or "Revised" License | 5 votes |
def tf_random_sample(sz, *args): s = tf.reshape((tf.shape(args[0])[0]), (1, )) ar = tf.expand_dims(tf.log(tf.tile([10.], s)), 0) sample = tf.multinomial(ar, sz)[0] return tuple(tf.gather(a, sample) for a in args)
Example #30
Source File: categorical.py From garage with MIT License | 5 votes |
def sample_sym(self, dist_info, name='sample_sym'): with tf.name_scope(name): probs = dist_info['prob'] samples = tf.multinomial(tf.math.log(probs + 1e-8), num_samples=1)[:, 0] return tf.nn.embedding_lookup(np.eye(self.dim, dtype=np.float32), samples)