Python tensorflow.multinomial() Examples

The following are 30 code examples of tensorflow.multinomial(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function .
Example #1
Source File: goal_nav_agent.py    From streetlearn with Apache License 2.0 6 votes vote down vote up
def _head(self, policy_input, heading, xy, target_xy):
    """Build the head of the agent: linear policy and value function, and pass
    the auxiliary outputs through.
    """

    # Linear policy and value function.
    policy_logits = snt.Linear(
        self._num_actions, name='policy_logits')(policy_input)
    baseline = tf.squeeze(snt.Linear(1, name='baseline')(policy_input), axis=-1)

    # Sample an action from the policy.
    new_action = tf.multinomial(
        policy_logits, num_samples=1, output_dtype=tf.int32)
    new_action = tf.squeeze(new_action, 1, name='new_action')

    return AgentOutput(
        new_action, policy_logits, baseline, heading, xy, target_xy) 
Example #2
Source File: transformer_nat.py    From BERT with Apache License 2.0 6 votes vote down vote up
def vq_nearest_neighbor(x, hparams):
  """Find the nearest element in means to elements in x."""
  bottleneck_size = 2**hparams.bottleneck_bits
  means = hparams.means
  x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True)
  means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True)
  scalar_prod = tf.matmul(x, means, transpose_b=True)
  dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod
  if hparams.bottleneck_kind == "em":
    x_means_idx = tf.multinomial(-dist, num_samples=hparams.num_samples)
    x_means_hot = tf.one_hot(
        x_means_idx, depth=bottleneck_size)
    x_means_hot = tf.reduce_mean(x_means_hot, axis=1)
  else:
    x_means_idx = tf.argmax(-dist, axis=-1)
    x_means_hot = tf.one_hot(x_means_idx, depth=bottleneck_size)
  x_means = tf.matmul(x_means_hot, means)
  e_loss = tf.reduce_mean(tf.squared_difference(x, tf.stop_gradient(x_means)))
  return x_means_hot, e_loss 
Example #3
Source File: complete_scan.py    From ScanComplete with Apache License 2.0 6 votes vote down vote up
def predict_from_model(logit_groups_geometry, logit_groups_semantics,
                       temperature):
  """Reconstruct predicted geometry and semantics from model output."""
  predictions_geometry_list = []
  for logit_group in logit_groups_geometry:
    if FLAGS.p_norm > 0:
      predictions_geometry_list.append(logit_group[:, :, :, :, 0])
    else:
      logit_group_shape = logit_group.shape_as_list()
      logit_group = tf.reshape(logit_group, [-1, logit_group_shape[-1]])
      samples = tf.multinomial(temperature * logit_group, 1)
      predictions_geometry_list.append(
          tf.reshape(samples, logit_group_shape[:-1]))
  predictions_semantics_list = []
  if FLAGS.predict_semantics:
    for logit_group in logit_groups_semantics:
      predictions_semantics_list.append(tf.argmax(logit_group, 4))
  else:
    predictions_semantics_list = [
        tf.zeros(shape=predictions_geometry_list[0].shape, dtype=tf.uint8)
    ] * len(predictions_geometry_list)
  return predictions_geometry_list, predictions_semantics_list 
Example #4
Source File: latent_layers.py    From BERT with Apache License 2.0 6 votes vote down vote up
def multinomial_sample(x, vocab_size=None, sampling_method="random",
                       temperature=1.0):
  """Multinomial sampling from a n-dimensional tensor.

  Args:
    x: Tensor of shape [..., vocab_size]. Parameterizes logits of multinomial.
    vocab_size: Number of classes in multinomial distribution.
    sampling_method: String, "random" or otherwise deterministic.
    temperature: Positive float.

  Returns:
    Tensor of shape [...].
  """
  vocab_size = vocab_size or common_layers.shape_list(x)[-1]
  if sampling_method == "random" and temperature > 0.0:
    samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1)
  else:
    samples = tf.argmax(x, axis=-1)
  reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1])
  return reshaped_samples 
Example #5
Source File: pg_reinforce.py    From tensorflow-reinforce with MIT License 6 votes vote down vote up
def sampleAction(self, states):
    # TODO: use this code piece when tf.multinomial gets better
    # sample action from current policy
    # actions = self.session.run(self.predicted_actions, {self.states: states})[0]
    # return actions[0]

    # temporary workaround
    def softmax(y):
      """ simple helper function here that takes unnormalized logprobs """
      maxy = np.amax(y)
      e = np.exp(y - maxy)
      return e / np.sum(e)

    # epsilon-greedy exploration strategy
    if random.random() < self.exploration:
      return random.randint(0, self.num_actions-1)
    else:
      action_scores = self.session.run(self.action_scores, {self.states: states})[0]
      action_probs  = softmax(action_scores) - 1e-5
      action = np.argmax(np.random.multinomial(1, action_probs))
      return action 
Example #6
Source File: base_controller.py    From EAS with MIT License 6 votes vote down vote up
def build_forward(self, _input):
		output = _input  # [batch_size, num_steps, rnn_units]
		feature_dim = int(output.get_shape()[2])  # rnn_units
		output = tf.reshape(output, [-1, feature_dim])  # [batch_size * num_steps, rnn_units]
		final_activation = 'sigmoid' if self.out_dim == 1 else 'softmax'
		if self.net_type == 'simple':
			net_config = [] if self.net_config is None else self.net_config
			with tf.variable_scope('wider_actor'):
				for layer in net_config:
					units, activation = layer.get('units'), layer.get('activation', 'relu')
					output = BasicModel.fc_layer(output, units, use_bias=True)
					output = BasicModel.activation(output, activation)
				logits = BasicModel.fc_layer(output, self.out_dim, use_bias=True)  # [batch_size * num_steps, out_dim]
			probs = BasicModel.activation(logits, final_activation)  # [batch_size * num_steps, out_dim]
			probs_dim = self.out_dim
			if self.out_dim == 1:
				probs = tf.concat([1 - probs, probs], axis=1)
				probs_dim = 2
				
			self.decision = tf.multinomial(tf.log(probs), 1)  # [batch_size * num_steps, 1]
			self.decision = tf.reshape(self.decision, [-1, self.num_steps])  # [batch_size, num_steps]
			self.probs = tf.reshape(probs, [-1, self.num_steps, probs_dim])  # [batch_size, num_steps, out_dim]
		else:
			raise ValueError('Do not support %s' % self.net_type) 
Example #7
Source File: transformer_nat.py    From training_results_v0.5 with Apache License 2.0 6 votes vote down vote up
def vq_nearest_neighbor(x, hparams):
  """Find the nearest element in means to elements in x."""
  bottleneck_size = 2**hparams.bottleneck_bits
  means = hparams.means
  x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True)
  means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True)
  scalar_prod = tf.matmul(x, means, transpose_b=True)
  dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod
  if hparams.bottleneck_kind == "em":
    x_means_idx = tf.multinomial(-dist, num_samples=hparams.num_samples)
    x_means_hot = tf.one_hot(
        x_means_idx, depth=bottleneck_size)
    x_means_hot = tf.reduce_mean(x_means_hot, axis=1)
  else:
    x_means_idx = tf.argmax(-dist, axis=-1)
    x_means_hot = tf.one_hot(x_means_idx, depth=bottleneck_size)
  x_means = tf.matmul(x_means_hot, means)
  e_loss = tf.reduce_mean(tf.square(x - tf.stop_gradient(x_means)))
  return x_means_hot, e_loss 
Example #8
Source File: common_layers.py    From training_results_v0.5 with Apache License 2.0 6 votes vote down vote up
def sample_with_temperature(logits, temperature):
  """Either argmax or random sampling.

  Args:
    logits: a Tensor.
    temperature: a float  0.0=argmax 1.0=random

  Returns:
    a Tensor with one fewer dimension than logits.
  """
  if temperature == 0.0:
    # TF argmax doesn't handle >5 dimensions, so we reshape here.
    logits_shape = shape_list(logits)
    argmax = tf.argmax(tf.reshape(logits, [-1, logits_shape[-1]]), axis=1)
    return tf.reshape(argmax, logits_shape[:-1])
  else:
    assert temperature > 0.0
    reshaped_logits = (
        tf.reshape(logits, [-1, shape_list(logits)[-1]]) / temperature)
    choices = tf.multinomial(reshaped_logits, 1)
    choices = tf.reshape(choices,
                         shape_list(logits)[:logits.get_shape().ndims - 1])
    return choices 
Example #9
Source File: latent_layers.py    From training_results_v0.5 with Apache License 2.0 6 votes vote down vote up
def multinomial_sample(x, vocab_size=None, sampling_method="random",
                       temperature=1.0):
  """Multinomial sampling from a n-dimensional tensor.

  Args:
    x: Tensor of shape [..., vocab_size]. Parameterizes logits of multinomial.
    vocab_size: Number of classes in multinomial distribution.
    sampling_method: String, "random" or otherwise deterministic.
    temperature: Positive float.

  Returns:
    Tensor of shape [...].
  """
  vocab_size = vocab_size or common_layers.shape_list(x)[-1]
  if sampling_method == "random" and temperature > 0.0:
    samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1)
  else:
    samples = tf.argmax(x, axis=-1)
  reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1])
  return reshaped_samples 
Example #10
Source File: discretization.py    From fine-lm with MIT License 6 votes vote down vote up
def vq_nearest_neighbor(x, means, soft_em=False, num_samples=10):
  """Find the nearest element in means to elements in x."""
  bottleneck_size = common_layers.shape_list(means)[0]
  x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True)
  means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True)
  scalar_prod = tf.matmul(x, means, transpose_b=True)
  dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod
  if soft_em:
    x_means_idx = tf.multinomial(-dist, num_samples=num_samples)
    x_means_hot = tf.one_hot(
        x_means_idx, depth=common_layers.shape_list(means)[0])
    x_means_hot = tf.reduce_mean(x_means_hot, axis=1)
  else:
    x_means_idx = tf.argmax(-dist, axis=-1)
    x_means_hot = tf.one_hot(x_means_idx, bottleneck_size)
  x_means_hot_flat = tf.reshape(x_means_hot, [-1, bottleneck_size])
  x_means = tf.matmul(x_means_hot_flat, means)
  e_loss = tf.reduce_mean(tf.square(x - tf.stop_gradient(x_means)))
  return x_means_hot, e_loss 
Example #11
Source File: graph_factory.py    From neural-symbolic-machines with Apache License 2.0 6 votes vote down vote up
def create_softmax_from_logits(logits):
  "Create nodes for softmax computation from logits."
  temperature = tf.placeholder_with_default(
    1.0, shape=(), name='temperature')
  logits = logits / temperature

  logits_shape = tf.shape(logits)
  logits_dim = logits_shape[-1]
  logits_2d = tf.reshape(logits, [-1, logits_dim])
  samples = tf.multinomial(logits_2d, 1)
  samples = tf.reshape(samples, logits_shape[:-1])

  probs = tf.nn.softmax(logits)
  predictions = tf.argmax(probs, axis=2)
    
  return logits, probs, predictions, samples, temperature


# Embedding 
Example #12
Source File: latent_layers.py    From fine-lm with MIT License 6 votes vote down vote up
def multinomial_sample(x, vocab_size, sampling_method, temperature):
  """Multinomial sampling from a n-dimensional tensor.

  Args:
    x: Tensor of shape [..., vocab_size]. Parameterizes logits of multinomial.
    vocab_size: Number of classes in multinomial distribution.
    sampling_method: String, "random" or otherwise deterministic.
    temperature: Positive float.

  Returns:
    Tensor of shape [...].
  """
  if sampling_method == "random":
    samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1)
  else:
    samples = tf.argmax(x, axis=-1)
  reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1])
  return reshaped_samples 
Example #13
Source File: transformer_nat.py    From fine-lm with MIT License 6 votes vote down vote up
def vq_nearest_neighbor(x, hparams):
  """Find the nearest element in means to elements in x."""
  bottleneck_size = 2**hparams.bottleneck_bits
  means = hparams.means
  x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True)
  means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True)
  scalar_prod = tf.matmul(x, means, transpose_b=True)
  dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod
  if hparams.bottleneck_kind == "em":
    x_means_idx = tf.multinomial(-dist, num_samples=hparams.num_samples)
    x_means_hot = tf.one_hot(
        x_means_idx, depth=bottleneck_size)
    x_means_hot = tf.reduce_mean(x_means_hot, axis=1)
  else:
    x_means_idx = tf.argmax(-dist, axis=-1)
    x_means_hot = tf.one_hot(x_means_idx, depth=bottleneck_size)
  x_means = tf.matmul(x_means_hot, means)
  e_loss = tf.reduce_mean(tf.square(x - tf.stop_gradient(x_means)))
  return x_means_hot, e_loss 
Example #14
Source File: pg_actor_critic.py    From Codes-for-RL-PER with MIT License 6 votes vote down vote up
def sampleAction(self, states):
    # TODO: use this code piece when tf.multinomial gets better
    # sample action from current policy
    # actions = self.session.run(self.predicted_actions, {self.states: states})[0]
    # return actions[0]

    # temporary workaround
    def softmax(y):
      """ simple helper function here that takes unnormalized logprobs """
      maxy = np.amax(y)
      e = np.exp(y - maxy)
      return e / np.sum(e)

    # epsilon-greedy exploration strategy
    if random.random() < self.exploration:
      return random.randint(0, self.num_actions-1)
    else:
      action_scores = self.session.run(self.action_scores, {self.states: states})[0]
      action_probs  = softmax(action_scores) - 1e-5
      action = np.argmax(np.random.multinomial(1, action_probs))
      return action 
Example #15
Source File: pg_actor_critic.py    From tensorflow-reinforce with MIT License 6 votes vote down vote up
def sampleAction(self, states):
    # TODO: use this code piece when tf.multinomial gets better
    # sample action from current policy
    # actions = self.session.run(self.predicted_actions, {self.states: states})[0]
    # return actions[0]

    # temporary workaround
    def softmax(y):
      """ simple helper function here that takes unnormalized logprobs """
      maxy = np.amax(y)
      e = np.exp(y - maxy)
      return e / np.sum(e)

    # epsilon-greedy exploration strategy
    if random.random() < self.exploration:
      return random.randint(0, self.num_actions-1)
    else:
      action_scores = self.session.run(self.action_scores, {self.states: states})[0]
      action_probs  = softmax(action_scores) - 1e-5
      action = np.argmax(np.random.multinomial(1, action_probs))
      return action 
Example #16
Source File: policy_net.py    From gail_ppo_tf with MIT License 5 votes vote down vote up
def __init__(self, name: str, env):
        """
        :param name: string
        :param env: gym env
        """

        ob_space = env.observation_space
        act_space = env.action_space

        with tf.variable_scope(name):
            self.obs = tf.placeholder(dtype=tf.float32, shape=[None] + list(ob_space.shape), name='obs')

            with tf.variable_scope('policy_net'):
                layer_1 = tf.layers.dense(inputs=self.obs, units=20, activation=tf.tanh)
                layer_2 = tf.layers.dense(inputs=layer_1, units=20, activation=tf.tanh)
                layer_3 = tf.layers.dense(inputs=layer_2, units=act_space.n, activation=tf.tanh)
                self.act_probs = tf.layers.dense(inputs=layer_3, units=act_space.n, activation=tf.nn.softmax)

            with tf.variable_scope('value_net'):
                layer_1 = tf.layers.dense(inputs=self.obs, units=20, activation=tf.tanh)
                layer_2 = tf.layers.dense(inputs=layer_1, units=20, activation=tf.tanh)
                self.v_preds = tf.layers.dense(inputs=layer_2, units=1, activation=None)

            self.act_stochastic = tf.multinomial(tf.log(self.act_probs), num_samples=1)
            self.act_stochastic = tf.reshape(self.act_stochastic, shape=[-1])

            self.act_deterministic = tf.argmax(self.act_probs, axis=1)

            self.scope = tf.get_variable_scope().name 
Example #17
Source File: common_layers.py    From BERT with Apache License 2.0 5 votes vote down vote up
def sample_with_temperature(logits, temperature, sampling_keep_top_k=-1):
  """Either argmax or random sampling.

  Args:
    logits: a Tensor.
    temperature: a float  0.0=argmax 1.0=random
    sampling_keep_top_k: If not -1, only sample from the top k logits.
  Returns:
    a Tensor with one fewer dimension than logits.
  """
  if temperature == 0.0:
    # TF argmax doesn't handle >5 dimensions, so we reshape here.
    logits_shape = shape_list(logits)
    argmax = tf.argmax(tf.reshape(logits, [-1, logits_shape[-1]]), axis=1)
    return tf.reshape(argmax, logits_shape[:-1])
  else:
    assert temperature > 0.0

    if sampling_keep_top_k != -1:
      if sampling_keep_top_k <= 0:
        raise ValueError("sampling_keep_top_k must either be -1 or positive.")

      vocab_size = shape_list(logits)[1]

      k_largest = tf.contrib.nn.nth_element(
          logits, n=sampling_keep_top_k, reverse=True)
      k_largest = tf.tile(tf.reshape(k_largest, [-1, 1]), [1, vocab_size])

      # Force every position that is not in the top k to have probability near
      # 0 by setting the logit to be very negative.
      logits = tf.where(tf.less_equal(logits, k_largest),
                        tf.ones_like(logits)*-1e6, logits)

    reshaped_logits = (
        tf.reshape(logits, [-1, shape_list(logits)[-1]]) / temperature)
    choices = tf.multinomial(reshaped_logits, 1)
    choices = tf.reshape(choices,
                         shape_list(logits)[:logits.get_shape().ndims - 1])
    return choices 
Example #18
Source File: multinomial_op_test.py    From deep_image_model with Apache License 2.0 5 votes vote down vote up
def testLargeLogits(self):
    for neg in [True, False]:
      with self.test_session(use_gpu=self.use_gpu):
        logits = np.array([[1000.] * 5])
        if neg:
          logits *= -1
        samples = tf.multinomial(logits, 10).eval()
      # Sampled classes should be in-range.
      self.assertTrue((samples >= 0).all())
      self.assertTrue((samples < 5).all()) 
Example #19
Source File: decoders.py    From DeepChatModels with MIT License 5 votes vote down vote up
def sample(self, projected_output):
        """Return integer ID tensor representing the sampled word.
        
        Args:
            projected_output: Tensor [1, 1, state_size], representing a single
                decoder timestep output. 
        """
        # TODO: We really need a tf.control_dependencies check here (for rank).
        with tf.name_scope('decoder_sampler', values=[projected_output]):

            # Protect against extra size-1 dimensions; grab the 1D tensor
            # of size state_size.
            logits = tf.squeeze(projected_output)
            if self.temperature < 0.02:
                return tf.argmax(logits, axis=0)

            # Convert logits to probability distribution.
            probabilities = tf.div(logits, self.temperature)
            projected_output = tf.div(
                tf.exp(probabilities),
                tf.reduce_sum(tf.exp(probabilities), axis=-1))

            # Sample 1 time from the probability distribution.
            sample_ID = tf.squeeze(
                tf.multinomial(tf.expand_dims(probabilities, 0), 1))
        return sample_ID 
Example #20
Source File: discretization.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def vq_nearest_neighbor(x, means,
                        soft_em=False, num_samples=10, temperature=None):
  """Find the nearest element in means to elements in x."""
  bottleneck_size = common_layers.shape_list(means)[0]
  x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True)
  means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True)
  scalar_prod = tf.matmul(x, means, transpose_b=True)
  dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod
  if soft_em:
    x_means_idx = tf.multinomial(-dist, num_samples=num_samples)
    x_means_hot = tf.one_hot(
        x_means_idx, depth=common_layers.shape_list(means)[0])
    x_means_hot = tf.reduce_mean(x_means_hot, axis=1)
  else:
    if temperature is None:
      x_means_idx = tf.argmax(-dist, axis=-1)
    else:
      x_means_idx = tf.multinomial(- dist / temperature, 1)
      x_means_idx = tf.squeeze(x_means_idx, axis=-1)
    if (common_layers.should_generate_summaries() and
        not common_layers.is_xla_compiled()):
      tf.summary.histogram("means_idx", tf.reshape(x_means_idx, [-1]))
    x_means_hot = tf.one_hot(x_means_idx, bottleneck_size)
  x_means_hot_flat = tf.reshape(x_means_hot, [-1, bottleneck_size])
  x_means = tf.matmul(x_means_hot_flat, means)
  e_loss = tf.reduce_mean(tf.square(x - tf.stop_gradient(x_means)))
  return x_means_hot, e_loss, dist 
Example #21
Source File: latent_layers.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def ae_latent_softmax(latents_pred, latents_discrete_hot, vocab_size, hparams):
  """Latent prediction and loss.

  Args:
    latents_pred: Tensor of shape [..., depth].
    latents_discrete_hot: Tensor of shape [..., vocab_size].
    vocab_size: an int representing the vocab size.
    hparams: tf.contrib.training.HParams.

  Returns:
    sample: Tensor of shape [...], a sample from a multinomial distribution.
    loss: Tensor of shape [...], the softmax cross-entropy.
  """
  with tf.variable_scope("latent_logits"):
    latents_logits = tf.layers.dense(latents_pred, vocab_size,
                                     name="logits_dense")
    if hparams.logit_normalization:
      latents_logits *= tf.rsqrt(1e-8 +
                                 tf.reduce_mean(tf.square(latents_logits)))
    loss = tf.nn.softmax_cross_entropy_with_logits_v2(
        labels=latents_discrete_hot, logits=latents_logits)

    # TODO(trandustin): tease this out from ae_latent_softmax.
    # we use just the loss portion to anchor prior / encoder on text.
    sample = multinomial_sample(latents_logits,
                                vocab_size,
                                hparams.sampling_method,
                                hparams.sampling_temp)
    return sample, loss 
Example #22
Source File: transformer_vae.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def multinomial_sample(x, vocab_size, temperature):
  """Multinomial sampling from a n-dimensional tensor."""
  if temperature > 0:
    samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1)
  else:
    samples = tf.argmax(x, axis=-1)
  reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1])
  return tf.to_int32(reshaped_samples) 
Example #23
Source File: multinomial_op_test.py    From deep_image_model with Apache License 2.0 5 votes vote down vote up
def testSmallEntropy(self):
    tf.set_random_seed(1618)
    with self.test_session(use_gpu=self.use_gpu):
      # A logit value of -10 corresponds to a probability of ~5e-5.
      logits = tf.constant([[-10., 10., -10.], [-10., -10., 10.]])
      num_samples = 1000
      samples = tf.multinomial(logits, num_samples).eval()
      self.assertAllEqual([[1] * num_samples, [2] * num_samples], samples) 
Example #24
Source File: models.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def create_dc_actor_critic(self, h_size, num_layers):
        num_streams = 1
        hidden_streams = self.create_new_obs(num_streams, h_size, num_layers)
        hidden = hidden_streams[0]

        if self.use_recurrent:
            tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32)
            self.prev_action = tf.placeholder(shape=[None], dtype=tf.int32, name='prev_action')
            self.prev_action_oh = c_layers.one_hot_encoding(self.prev_action, self.a_size)
            hidden = tf.concat([hidden, self.prev_action_oh], axis=1)

            self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
            hidden, self.memory_out = self.create_recurrent_encoder(hidden, self.memory_in)
            self.memory_out = tf.identity(self.memory_out, name='recurrent_out')

        self.policy = tf.layers.dense(hidden, self.a_size, activation=None, use_bias=False,
                                      kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))

        self.all_probs = tf.nn.softmax(self.policy, name="action_probs")
        self.output = tf.multinomial(self.policy, 1)
        self.output = tf.identity(self.output, name="action")

        self.value = tf.layers.dense(hidden, 1, activation=None)
        self.value = tf.identity(self.value, name="value_estimate")
        self.entropy = -tf.reduce_sum(self.all_probs * tf.log(self.all_probs + 1e-10), axis=1)
        self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32)
        self.selected_actions = c_layers.one_hot_encoding(self.action_holder, self.a_size)

        self.all_old_probs = tf.placeholder(shape=[None, self.a_size], dtype=tf.float32, name='old_probabilities')

        # We reshape these tensors to [batch x 1] in order to be of the same rank as continuous control probabilities.
        self.probs = tf.expand_dims(tf.reduce_sum(self.all_probs * self.selected_actions, axis=1), 1)
        self.old_probs = tf.expand_dims(tf.reduce_sum(self.all_old_probs * self.selected_actions, axis=1), 1) 
Example #25
Source File: models.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def __init__(self, brain, h_size=128, lr=1e-4, n_layers=2, m_size=128,
                 normalize=False, use_recurrent=False):
        LearningModel.__init__(self, m_size, normalize, use_recurrent, brain)

        num_streams = 1
        hidden_streams = self.create_new_obs(num_streams, h_size, n_layers)
        hidden = hidden_streams[0]
        self.dropout_rate = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_rate")
        hidden_reg = tf.layers.dropout(hidden, self.dropout_rate)
        if self.use_recurrent:
            self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
            hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in)
            self.memory_out = tf.identity(self.memory_out, name='recurrent_out')
        self.policy = tf.layers.dense(hidden_reg, self.a_size, activation=None, use_bias=False,
                                      kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))

        if brain.vector_action_space_type == "discrete":
            self.action_probs = tf.nn.softmax(self.policy)
            self.sample_action_float = tf.multinomial(self.policy, 1)
            self.sample_action_float = tf.identity(self.sample_action_float, name="action")
            self.sample_action = tf.cast(self.sample_action_float, tf.int32)
            self.true_action = tf.placeholder(shape=[None], dtype=tf.int32, name="teacher_action")
            self.action_oh = tf.one_hot(self.true_action, self.a_size)
            self.loss = tf.reduce_sum(-tf.log(self.action_probs + 1e-10) * self.action_oh)
            self.action_percent = tf.reduce_mean(tf.cast(
                tf.equal(tf.cast(tf.argmax(self.action_probs, axis=1), tf.int32), self.sample_action), tf.float32))
        else:
            self.sample_action = tf.identity(self.policy, name="action")
            self.true_action = tf.placeholder(shape=[None, self.a_size], dtype=tf.float32, name="teacher_action")
            self.loss = tf.reduce_sum(tf.squared_difference(self.true_action, self.sample_action))

        optimizer = tf.train.AdamOptimizer(learning_rate=lr)
        self.update = optimizer.minimize(self.loss) 
Example #26
Source File: multinomial_op_test.py    From deep_image_model with Apache License 2.0 5 votes vote down vote up
def testEmpty(self):
    classes = 5
    with self.test_session(use_gpu=self.use_gpu):
      for batch in 0, 3:
        for samples in 0, 7:
          x = tf.multinomial(tf.zeros([batch, classes]), samples).eval()
          self.assertEqual(x.shape, (batch, samples)) 
Example #27
Source File: multinomial_op_test.py    From deep_image_model with Apache License 2.0 5 votes vote down vote up
def testEmptyClasses(self):
    with self.test_session(use_gpu=self.use_gpu):
      x = tf.multinomial(tf.zeros([5, 0]), 7)
      with self.assertRaisesOpError("num_classes should be positive"):
        x.eval() 
Example #28
Source File: controller.py    From enas with Apache License 2.0 5 votes vote down vote up
def _build_sampler(self):
    """Build the sampler ops and the log_prob ops."""

    arc_seq = []
    sample_log_probs = []
    all_h = []

    # sampler ops
    inputs = self.g_emb
    prev_c = [tf.zeros([1, self.lstm_size], dtype=tf.float32)
              for _ in xrange(self.lstm_num_layers)]
    prev_h = [tf.zeros([1, self.lstm_size], dtype=tf.float32)
              for _ in xrange(self.lstm_num_layers)]
    for layer_id in xrange(self.num_layers):
      for branch_id in xrange(self.num_branches):
        next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
        all_h.append(tf.stop_gradient(next_h[-1]))

        logits = tf.matmul(next_h[-1], self.w_soft)
        if self.temperature is not None:
          logits /= self.temperature
        if self.tanh_constant is not None:
          logits = self.tanh_constant * tf.tanh(logits)

        config_id = tf.multinomial(logits, 1)
        config_id = tf.to_int32(config_id)
        config_id = tf.reshape(config_id, [1])
        arc_seq.append(config_id)
        log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=config_id)
        sample_log_probs.append(log_prob)

        inputs = tf.nn.embedding_lookup(self.w_emb, config_id)
    arc_seq = tf.concat(arc_seq, axis=0)
    self.sample_arc = arc_seq

    self.sample_log_probs = tf.concat(sample_log_probs, axis=0)
    self.ppl = tf.exp(tf.reduce_sum(self.sample_log_probs) /
                      tf.to_float(self.num_layers * self.num_branches))
    self.all_h = all_h 
Example #29
Source File: utils_tf.py    From incremental_detectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def tf_random_sample(sz, *args):
    s = tf.reshape((tf.shape(args[0])[0]), (1, ))
    ar = tf.expand_dims(tf.log(tf.tile([10.], s)), 0)
    sample = tf.multinomial(ar, sz)[0]
    return tuple(tf.gather(a, sample) for a in args) 
Example #30
Source File: categorical.py    From garage with MIT License 5 votes vote down vote up
def sample_sym(self, dist_info, name='sample_sym'):
        with tf.name_scope(name):
            probs = dist_info['prob']
            samples = tf.multinomial(tf.math.log(probs + 1e-8),
                                     num_samples=1)[:, 0]

            return tf.nn.embedding_lookup(np.eye(self.dim, dtype=np.float32),
                                          samples)