Python Examples of tensorflow.multinomial

Source File: goal_nav_agent.py From streetlearn with Apache License 2.0

6 votes

def _head(self, policy_input, heading, xy, target_xy):
    """Build the head of the agent: linear policy and value function, and pass
    the auxiliary outputs through.
    """

    # Linear policy and value function.
    policy_logits = snt.Linear(
        self._num_actions, name='policy_logits')(policy_input)
    baseline = tf.squeeze(snt.Linear(1, name='baseline')(policy_input), axis=-1)

    # Sample an action from the policy.
    new_action = tf.multinomial(
        policy_logits, num_samples=1, output_dtype=tf.int32)
    new_action = tf.squeeze(new_action, 1, name='new_action')

    return AgentOutput(
        new_action, policy_logits, baseline, heading, xy, target_xy)

Source File: transformer_nat.py From BERT with Apache License 2.0

6 votes

def vq_nearest_neighbor(x, hparams):
  """Find the nearest element in means to elements in x."""
  bottleneck_size = 2**hparams.bottleneck_bits
  means = hparams.means
  x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True)
  means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True)
  scalar_prod = tf.matmul(x, means, transpose_b=True)
  dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod
  if hparams.bottleneck_kind == "em":
    x_means_idx = tf.multinomial(-dist, num_samples=hparams.num_samples)
    x_means_hot = tf.one_hot(
        x_means_idx, depth=bottleneck_size)
    x_means_hot = tf.reduce_mean(x_means_hot, axis=1)
  else:
    x_means_idx = tf.argmax(-dist, axis=-1)
    x_means_hot = tf.one_hot(x_means_idx, depth=bottleneck_size)
  x_means = tf.matmul(x_means_hot, means)
  e_loss = tf.reduce_mean(tf.squared_difference(x, tf.stop_gradient(x_means)))
  return x_means_hot, e_loss

Source File: complete_scan.py From ScanComplete with Apache License 2.0

6 votes

def predict_from_model(logit_groups_geometry, logit_groups_semantics,
                       temperature):
  """Reconstruct predicted geometry and semantics from model output."""
  predictions_geometry_list = []
  for logit_group in logit_groups_geometry:
    if FLAGS.p_norm > 0:
      predictions_geometry_list.append(logit_group[:, :, :, :, 0])
    else:
      logit_group_shape = logit_group.shape_as_list()
      logit_group = tf.reshape(logit_group, [-1, logit_group_shape[-1]])
      samples = tf.multinomial(temperature * logit_group, 1)
      predictions_geometry_list.append(
          tf.reshape(samples, logit_group_shape[:-1]))
  predictions_semantics_list = []
  if FLAGS.predict_semantics:
    for logit_group in logit_groups_semantics:
      predictions_semantics_list.append(tf.argmax(logit_group, 4))
  else:
    predictions_semantics_list = [
        tf.zeros(shape=predictions_geometry_list[0].shape, dtype=tf.uint8)
    ] * len(predictions_geometry_list)
  return predictions_geometry_list, predictions_semantics_list

Source File: latent_layers.py From BERT with Apache License 2.0

6 votes

def multinomial_sample(x, vocab_size=None, sampling_method="random",
                       temperature=1.0):
  """Multinomial sampling from a n-dimensional tensor.

  Args:
    x: Tensor of shape [..., vocab_size]. Parameterizes logits of multinomial.
    vocab_size: Number of classes in multinomial distribution.
    sampling_method: String, "random" or otherwise deterministic.
    temperature: Positive float.

  Returns:
    Tensor of shape [...].
  """
  vocab_size = vocab_size or common_layers.shape_list(x)[-1]
  if sampling_method == "random" and temperature > 0.0:
    samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1)
  else:
    samples = tf.argmax(x, axis=-1)
  reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1])
  return reshaped_samples

Source File: pg_reinforce.py From tensorflow-reinforce with MIT License

6 votes

def sampleAction(self, states):
    # TODO: use this code piece when tf.multinomial gets better
    # sample action from current policy
    # actions = self.session.run(self.predicted_actions, {self.states: states})[0]
    # return actions[0]

    # temporary workaround
    def softmax(y):
      """ simple helper function here that takes unnormalized logprobs """
      maxy = np.amax(y)
      e = np.exp(y - maxy)
      return e / np.sum(e)

    # epsilon-greedy exploration strategy
    if random.random() < self.exploration:
      return random.randint(0, self.num_actions-1)
    else:
      action_scores = self.session.run(self.action_scores, {self.states: states})[0]
      action_probs  = softmax(action_scores) - 1e-5
      action = np.argmax(np.random.multinomial(1, action_probs))
      return action

Source File: base_controller.py From EAS with MIT License

6 votes

def build_forward(self, _input):
		output = _input  # [batch_size, num_steps, rnn_units]
		feature_dim = int(output.get_shape()[2])  # rnn_units
		output = tf.reshape(output, [-1, feature_dim])  # [batch_size * num_steps, rnn_units]
		final_activation = 'sigmoid' if self.out_dim == 1 else 'softmax'
		if self.net_type == 'simple':
			net_config = [] if self.net_config is None else self.net_config
			with tf.variable_scope('wider_actor'):
				for layer in net_config:
					units, activation = layer.get('units'), layer.get('activation', 'relu')
					output = BasicModel.fc_layer(output, units, use_bias=True)
					output = BasicModel.activation(output, activation)
				logits = BasicModel.fc_layer(output, self.out_dim, use_bias=True)  # [batch_size * num_steps, out_dim]
			probs = BasicModel.activation(logits, final_activation)  # [batch_size * num_steps, out_dim]
			probs_dim = self.out_dim
			if self.out_dim == 1:
				probs = tf.concat([1 - probs, probs], axis=1)
				probs_dim = 2
				
			self.decision = tf.multinomial(tf.log(probs), 1)  # [batch_size * num_steps, 1]
			self.decision = tf.reshape(self.decision, [-1, self.num_steps])  # [batch_size, num_steps]
			self.probs = tf.reshape(probs, [-1, self.num_steps, probs_dim])  # [batch_size, num_steps, out_dim]
		else:
			raise ValueError('Do not support %s' % self.net_type)

Source File: transformer_nat.py From training_results_v0.5 with Apache License 2.0

6 votes

def vq_nearest_neighbor(x, hparams):
  """Find the nearest element in means to elements in x."""
  bottleneck_size = 2**hparams.bottleneck_bits
  means = hparams.means
  x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True)
  means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True)
  scalar_prod = tf.matmul(x, means, transpose_b=True)
  dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod
  if hparams.bottleneck_kind == "em":
    x_means_idx = tf.multinomial(-dist, num_samples=hparams.num_samples)
    x_means_hot = tf.one_hot(
        x_means_idx, depth=bottleneck_size)
    x_means_hot = tf.reduce_mean(x_means_hot, axis=1)
  else:
    x_means_idx = tf.argmax(-dist, axis=-1)
    x_means_hot = tf.one_hot(x_means_idx, depth=bottleneck_size)
  x_means = tf.matmul(x_means_hot, means)
  e_loss = tf.reduce_mean(tf.square(x - tf.stop_gradient(x_means)))
  return x_means_hot, e_loss

Source File: common_layers.py From training_results_v0.5 with Apache License 2.0

6 votes

def sample_with_temperature(logits, temperature):
  """Either argmax or random sampling.

  Args:
    logits: a Tensor.
    temperature: a float  0.0=argmax 1.0=random

  Returns:
    a Tensor with one fewer dimension than logits.
  """
  if temperature == 0.0:
    # TF argmax doesn't handle >5 dimensions, so we reshape here.
    logits_shape = shape_list(logits)
    argmax = tf.argmax(tf.reshape(logits, [-1, logits_shape[-1]]), axis=1)
    return tf.reshape(argmax, logits_shape[:-1])
  else:
    assert temperature > 0.0
    reshaped_logits = (
        tf.reshape(logits, [-1, shape_list(logits)[-1]]) / temperature)
    choices = tf.multinomial(reshaped_logits, 1)
    choices = tf.reshape(choices,
                         shape_list(logits)[:logits.get_shape().ndims - 1])
    return choices

Source File: latent_layers.py From training_results_v0.5 with Apache License 2.0

6 votes

def multinomial_sample(x, vocab_size=None, sampling_method="random",
                       temperature=1.0):
  """Multinomial sampling from a n-dimensional tensor.

  Args:
    x: Tensor of shape [..., vocab_size]. Parameterizes logits of multinomial.
    vocab_size: Number of classes in multinomial distribution.
    sampling_method: String, "random" or otherwise deterministic.
    temperature: Positive float.

  Returns:
    Tensor of shape [...].
  """
  vocab_size = vocab_size or common_layers.shape_list(x)[-1]
  if sampling_method == "random" and temperature > 0.0:
    samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1)
  else:
    samples = tf.argmax(x, axis=-1)
  reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1])
  return reshaped_samples

Source File: discretization.py From fine-lm with MIT License

6 votes

def vq_nearest_neighbor(x, means, soft_em=False, num_samples=10):
  """Find the nearest element in means to elements in x."""
  bottleneck_size = common_layers.shape_list(means)[0]
  x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True)
  means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True)
  scalar_prod = tf.matmul(x, means, transpose_b=True)
  dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod
  if soft_em:
    x_means_idx = tf.multinomial(-dist, num_samples=num_samples)
    x_means_hot = tf.one_hot(
        x_means_idx, depth=common_layers.shape_list(means)[0])
    x_means_hot = tf.reduce_mean(x_means_hot, axis=1)
  else:
    x_means_idx = tf.argmax(-dist, axis=-1)
    x_means_hot = tf.one_hot(x_means_idx, bottleneck_size)
  x_means_hot_flat = tf.reshape(x_means_hot, [-1, bottleneck_size])
  x_means = tf.matmul(x_means_hot_flat, means)
  e_loss = tf.reduce_mean(tf.square(x - tf.stop_gradient(x_means)))
  return x_means_hot, e_loss

Source File: graph_factory.py From neural-symbolic-machines with Apache License 2.0

6 votes

def create_softmax_from_logits(logits):
  "Create nodes for softmax computation from logits."
  temperature = tf.placeholder_with_default(
    1.0, shape=(), name='temperature')
  logits = logits / temperature

  logits_shape = tf.shape(logits)
  logits_dim = logits_shape[-1]
  logits_2d = tf.reshape(logits, [-1, logits_dim])
  samples = tf.multinomial(logits_2d, 1)
  samples = tf.reshape(samples, logits_shape[:-1])

  probs = tf.nn.softmax(logits)
  predictions = tf.argmax(probs, axis=2)
    
  return logits, probs, predictions, samples, temperature


# Embedding

Source File: latent_layers.py From fine-lm with MIT License

6 votes

def multinomial_sample(x, vocab_size, sampling_method, temperature):
  """Multinomial sampling from a n-dimensional tensor.

  Args:
    x: Tensor of shape [..., vocab_size]. Parameterizes logits of multinomial.
    vocab_size: Number of classes in multinomial distribution.
    sampling_method: String, "random" or otherwise deterministic.
    temperature: Positive float.

  Returns:
    Tensor of shape [...].
  """
  if sampling_method == "random":
    samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1)
  else:
    samples = tf.argmax(x, axis=-1)
  reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1])
  return reshaped_samples

Source File: transformer_nat.py From fine-lm with MIT License

6 votes

def vq_nearest_neighbor(x, hparams):
  """Find the nearest element in means to elements in x."""
  bottleneck_size = 2**hparams.bottleneck_bits
  means = hparams.means
  x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True)
  means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True)
  scalar_prod = tf.matmul(x, means, transpose_b=True)
  dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod
  if hparams.bottleneck_kind == "em":
    x_means_idx = tf.multinomial(-dist, num_samples=hparams.num_samples)
    x_means_hot = tf.one_hot(
        x_means_idx, depth=bottleneck_size)
    x_means_hot = tf.reduce_mean(x_means_hot, axis=1)
  else:
    x_means_idx = tf.argmax(-dist, axis=-1)
    x_means_hot = tf.one_hot(x_means_idx, depth=bottleneck_size)
  x_means = tf.matmul(x_means_hot, means)
  e_loss = tf.reduce_mean(tf.square(x - tf.stop_gradient(x_means)))
  return x_means_hot, e_loss

Source File: pg_actor_critic.py From Codes-for-RL-PER with MIT License

6 votes

def sampleAction(self, states):
    # TODO: use this code piece when tf.multinomial gets better
    # sample action from current policy
    # actions = self.session.run(self.predicted_actions, {self.states: states})[0]
    # return actions[0]

    # temporary workaround
    def softmax(y):
      """ simple helper function here that takes unnormalized logprobs """
      maxy = np.amax(y)
      e = np.exp(y - maxy)
      return e / np.sum(e)

    # epsilon-greedy exploration strategy
    if random.random() < self.exploration:
      return random.randint(0, self.num_actions-1)
    else:
      action_scores = self.session.run(self.action_scores, {self.states: states})[0]
      action_probs  = softmax(action_scores) - 1e-5
      action = np.argmax(np.random.multinomial(1, action_probs))
      return action

Source File: pg_actor_critic.py From tensorflow-reinforce with MIT License

6 votes

def sampleAction(self, states):
    # TODO: use this code piece when tf.multinomial gets better
    # sample action from current policy
    # actions = self.session.run(self.predicted_actions, {self.states: states})[0]
    # return actions[0]

    # temporary workaround
    def softmax(y):
      """ simple helper function here that takes unnormalized logprobs """
      maxy = np.amax(y)
      e = np.exp(y - maxy)
      return e / np.sum(e)

    # epsilon-greedy exploration strategy
    if random.random() < self.exploration:
      return random.randint(0, self.num_actions-1)
    else:
      action_scores = self.session.run(self.action_scores, {self.states: states})[0]
      action_probs  = softmax(action_scores) - 1e-5
      action = np.argmax(np.random.multinomial(1, action_probs))
      return action

Source File: policy_net.py From gail_ppo_tf with MIT License

5 votes

def __init__(self, name: str, env):
        """
        :param name: string
        :param env: gym env
        """

        ob_space = env.observation_space
        act_space = env.action_space

        with tf.variable_scope(name):
            self.obs = tf.placeholder(dtype=tf.float32, shape=[None] + list(ob_space.shape), name='obs')

            with tf.variable_scope('policy_net'):
                layer_1 = tf.layers.dense(inputs=self.obs, units=20, activation=tf.tanh)
                layer_2 = tf.layers.dense(inputs=layer_1, units=20, activation=tf.tanh)
                layer_3 = tf.layers.dense(inputs=layer_2, units=act_space.n, activation=tf.tanh)
                self.act_probs = tf.layers.dense(inputs=layer_3, units=act_space.n, activation=tf.nn.softmax)

            with tf.variable_scope('value_net'):
                layer_1 = tf.layers.dense(inputs=self.obs, units=20, activation=tf.tanh)
                layer_2 = tf.layers.dense(inputs=layer_1, units=20, activation=tf.tanh)
                self.v_preds = tf.layers.dense(inputs=layer_2, units=1, activation=None)

            self.act_stochastic = tf.multinomial(tf.log(self.act_probs), num_samples=1)
            self.act_stochastic = tf.reshape(self.act_stochastic, shape=[-1])

            self.act_deterministic = tf.argmax(self.act_probs, axis=1)

            self.scope = tf.get_variable_scope().name

Source File: common_layers.py From BERT with Apache License 2.0

5 votes

def sample_with_temperature(logits, temperature, sampling_keep_top_k=-1):
  """Either argmax or random sampling.

  Args:
    logits: a Tensor.
    temperature: a float  0.0=argmax 1.0=random
    sampling_keep_top_k: If not -1, only sample from the top k logits.
  Returns:
    a Tensor with one fewer dimension than logits.
  """
  if temperature == 0.0:
    # TF argmax doesn't handle >5 dimensions, so we reshape here.
    logits_shape = shape_list(logits)
    argmax = tf.argmax(tf.reshape(logits, [-1, logits_shape[-1]]), axis=1)
    return tf.reshape(argmax, logits_shape[:-1])
  else:
    assert temperature > 0.0

    if sampling_keep_top_k != -1:
      if sampling_keep_top_k <= 0:
        raise ValueError("sampling_keep_top_k must either be -1 or positive.")

      vocab_size = shape_list(logits)[1]

      k_largest = tf.contrib.nn.nth_element(
          logits, n=sampling_keep_top_k, reverse=True)
      k_largest = tf.tile(tf.reshape(k_largest, [-1, 1]), [1, vocab_size])

      # Force every position that is not in the top k to have probability near
      # 0 by setting the logit to be very negative.
      logits = tf.where(tf.less_equal(logits, k_largest),
                        tf.ones_like(logits)*-1e6, logits)

    reshaped_logits = (
        tf.reshape(logits, [-1, shape_list(logits)[-1]]) / temperature)
    choices = tf.multinomial(reshaped_logits, 1)
    choices = tf.reshape(choices,
                         shape_list(logits)[:logits.get_shape().ndims - 1])
    return choices

Source File: multinomial_op_test.py From deep_image_model with Apache License 2.0

5 votes

def testLargeLogits(self):
    for neg in [True, False]:
      with self.test_session(use_gpu=self.use_gpu):
        logits = np.array([[1000.] * 5])
        if neg:
          logits *= -1
        samples = tf.multinomial(logits, 10).eval()
      # Sampled classes should be in-range.
      self.assertTrue((samples >= 0).all())
      self.assertTrue((samples < 5).all())

Source File: decoders.py From DeepChatModels with MIT License

5 votes

def sample(self, projected_output):
        """Return integer ID tensor representing the sampled word.
        
        Args:
            projected_output: Tensor [1, 1, state_size], representing a single
                decoder timestep output. 
        """
        # TODO: We really need a tf.control_dependencies check here (for rank).
        with tf.name_scope('decoder_sampler', values=[projected_output]):

            # Protect against extra size-1 dimensions; grab the 1D tensor
            # of size state_size.
            logits = tf.squeeze(projected_output)
            if self.temperature < 0.02:
                return tf.argmax(logits, axis=0)

            # Convert logits to probability distribution.
            probabilities = tf.div(logits, self.temperature)
            projected_output = tf.div(
                tf.exp(probabilities),
                tf.reduce_sum(tf.exp(probabilities), axis=-1))

            # Sample 1 time from the probability distribution.
            sample_ID = tf.squeeze(
                tf.multinomial(tf.expand_dims(probabilities, 0), 1))
        return sample_ID

Source File: discretization.py From training_results_v0.5 with Apache License 2.0

5 votes

def vq_nearest_neighbor(x, means,
                        soft_em=False, num_samples=10, temperature=None):
  """Find the nearest element in means to elements in x."""
  bottleneck_size = common_layers.shape_list(means)[0]
  x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keepdims=True)
  means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keepdims=True)
  scalar_prod = tf.matmul(x, means, transpose_b=True)
  dist = x_norm_sq + tf.transpose(means_norm_sq) - 2 * scalar_prod
  if soft_em:
    x_means_idx = tf.multinomial(-dist, num_samples=num_samples)
    x_means_hot = tf.one_hot(
        x_means_idx, depth=common_layers.shape_list(means)[0])
    x_means_hot = tf.reduce_mean(x_means_hot, axis=1)
  else:
    if temperature is None:
      x_means_idx = tf.argmax(-dist, axis=-1)
    else:
      x_means_idx = tf.multinomial(- dist / temperature, 1)
      x_means_idx = tf.squeeze(x_means_idx, axis=-1)
    if (common_layers.should_generate_summaries() and
        not common_layers.is_xla_compiled()):
      tf.summary.histogram("means_idx", tf.reshape(x_means_idx, [-1]))
    x_means_hot = tf.one_hot(x_means_idx, bottleneck_size)
  x_means_hot_flat = tf.reshape(x_means_hot, [-1, bottleneck_size])
  x_means = tf.matmul(x_means_hot_flat, means)
  e_loss = tf.reduce_mean(tf.square(x - tf.stop_gradient(x_means)))
  return x_means_hot, e_loss, dist

Source File: latent_layers.py From training_results_v0.5 with Apache License 2.0

5 votes

def ae_latent_softmax(latents_pred, latents_discrete_hot, vocab_size, hparams):
  """Latent prediction and loss.

  Args:
    latents_pred: Tensor of shape [..., depth].
    latents_discrete_hot: Tensor of shape [..., vocab_size].
    vocab_size: an int representing the vocab size.
    hparams: tf.contrib.training.HParams.

  Returns:
    sample: Tensor of shape [...], a sample from a multinomial distribution.
    loss: Tensor of shape [...], the softmax cross-entropy.
  """
  with tf.variable_scope("latent_logits"):
    latents_logits = tf.layers.dense(latents_pred, vocab_size,
                                     name="logits_dense")
    if hparams.logit_normalization:
      latents_logits *= tf.rsqrt(1e-8 +
                                 tf.reduce_mean(tf.square(latents_logits)))
    loss = tf.nn.softmax_cross_entropy_with_logits_v2(
        labels=latents_discrete_hot, logits=latents_logits)

    # TODO(trandustin): tease this out from ae_latent_softmax.
    # we use just the loss portion to anchor prior / encoder on text.
    sample = multinomial_sample(latents_logits,
                                vocab_size,
                                hparams.sampling_method,
                                hparams.sampling_temp)
    return sample, loss

Source File: transformer_vae.py From training_results_v0.5 with Apache License 2.0

5 votes

def multinomial_sample(x, vocab_size, temperature):
  """Multinomial sampling from a n-dimensional tensor."""
  if temperature > 0:
    samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1)
  else:
    samples = tf.argmax(x, axis=-1)
  reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1])
  return tf.to_int32(reshaped_samples)

Source File: multinomial_op_test.py From deep_image_model with Apache License 2.0

5 votes

def testSmallEntropy(self):
    tf.set_random_seed(1618)
    with self.test_session(use_gpu=self.use_gpu):
      # A logit value of -10 corresponds to a probability of ~5e-5.
      logits = tf.constant([[-10., 10., -10.], [-10., -10., 10.]])
      num_samples = 1000
      samples = tf.multinomial(logits, num_samples).eval()
      self.assertAllEqual([[1] * num_samples, [2] * num_samples], samples)

Source File: models.py From DRL_DeliveryDuel with MIT License

5 votes

def create_dc_actor_critic(self, h_size, num_layers):
        num_streams = 1
        hidden_streams = self.create_new_obs(num_streams, h_size, num_layers)
        hidden = hidden_streams[0]

        if self.use_recurrent:
            tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32)
            self.prev_action = tf.placeholder(shape=[None], dtype=tf.int32, name='prev_action')
            self.prev_action_oh = c_layers.one_hot_encoding(self.prev_action, self.a_size)
            hidden = tf.concat([hidden, self.prev_action_oh], axis=1)

            self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
            hidden, self.memory_out = self.create_recurrent_encoder(hidden, self.memory_in)
            self.memory_out = tf.identity(self.memory_out, name='recurrent_out')

        self.policy = tf.layers.dense(hidden, self.a_size, activation=None, use_bias=False,
                                      kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))

        self.all_probs = tf.nn.softmax(self.policy, name="action_probs")
        self.output = tf.multinomial(self.policy, 1)
        self.output = tf.identity(self.output, name="action")

        self.value = tf.layers.dense(hidden, 1, activation=None)
        self.value = tf.identity(self.value, name="value_estimate")
        self.entropy = -tf.reduce_sum(self.all_probs * tf.log(self.all_probs + 1e-10), axis=1)
        self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32)
        self.selected_actions = c_layers.one_hot_encoding(self.action_holder, self.a_size)

        self.all_old_probs = tf.placeholder(shape=[None, self.a_size], dtype=tf.float32, name='old_probabilities')

        # We reshape these tensors to [batch x 1] in order to be of the same rank as continuous control probabilities.
        self.probs = tf.expand_dims(tf.reduce_sum(self.all_probs * self.selected_actions, axis=1), 1)
        self.old_probs = tf.expand_dims(tf.reduce_sum(self.all_old_probs * self.selected_actions, axis=1), 1)

Source File: models.py From DRL_DeliveryDuel with MIT License

5 votes

def __init__(self, brain, h_size=128, lr=1e-4, n_layers=2, m_size=128,
                 normalize=False, use_recurrent=False):
        LearningModel.__init__(self, m_size, normalize, use_recurrent, brain)

        num_streams = 1
        hidden_streams = self.create_new_obs(num_streams, h_size, n_layers)
        hidden = hidden_streams[0]
        self.dropout_rate = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_rate")
        hidden_reg = tf.layers.dropout(hidden, self.dropout_rate)
        if self.use_recurrent:
            self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
            hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in)
            self.memory_out = tf.identity(self.memory_out, name='recurrent_out')
        self.policy = tf.layers.dense(hidden_reg, self.a_size, activation=None, use_bias=False,
                                      kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))

        if brain.vector_action_space_type == "discrete":
            self.action_probs = tf.nn.softmax(self.policy)
            self.sample_action_float = tf.multinomial(self.policy, 1)
            self.sample_action_float = tf.identity(self.sample_action_float, name="action")
            self.sample_action = tf.cast(self.sample_action_float, tf.int32)
            self.true_action = tf.placeholder(shape=[None], dtype=tf.int32, name="teacher_action")
            self.action_oh = tf.one_hot(self.true_action, self.a_size)
            self.loss = tf.reduce_sum(-tf.log(self.action_probs + 1e-10) * self.action_oh)
            self.action_percent = tf.reduce_mean(tf.cast(
                tf.equal(tf.cast(tf.argmax(self.action_probs, axis=1), tf.int32), self.sample_action), tf.float32))
        else:
            self.sample_action = tf.identity(self.policy, name="action")
            self.true_action = tf.placeholder(shape=[None, self.a_size], dtype=tf.float32, name="teacher_action")
            self.loss = tf.reduce_sum(tf.squared_difference(self.true_action, self.sample_action))

        optimizer = tf.train.AdamOptimizer(learning_rate=lr)
        self.update = optimizer.minimize(self.loss)

Source File: multinomial_op_test.py From deep_image_model with Apache License 2.0

5 votes

def testEmpty(self):
    classes = 5
    with self.test_session(use_gpu=self.use_gpu):
      for batch in 0, 3:
        for samples in 0, 7:
          x = tf.multinomial(tf.zeros([batch, classes]), samples).eval()
          self.assertEqual(x.shape, (batch, samples))

Source File: multinomial_op_test.py From deep_image_model with Apache License 2.0

5 votes

def testEmptyClasses(self):
    with self.test_session(use_gpu=self.use_gpu):
      x = tf.multinomial(tf.zeros([5, 0]), 7)
      with self.assertRaisesOpError("num_classes should be positive"):
        x.eval()

Source File: controller.py From enas with Apache License 2.0

5 votes

def _build_sampler(self):
    """Build the sampler ops and the log_prob ops."""

    arc_seq = []
    sample_log_probs = []
    all_h = []

    # sampler ops
    inputs = self.g_emb
    prev_c = [tf.zeros([1, self.lstm_size], dtype=tf.float32)
              for _ in xrange(self.lstm_num_layers)]
    prev_h = [tf.zeros([1, self.lstm_size], dtype=tf.float32)
              for _ in xrange(self.lstm_num_layers)]
    for layer_id in xrange(self.num_layers):
      for branch_id in xrange(self.num_branches):
        next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
        all_h.append(tf.stop_gradient(next_h[-1]))

        logits = tf.matmul(next_h[-1], self.w_soft)
        if self.temperature is not None:
          logits /= self.temperature
        if self.tanh_constant is not None:
          logits = self.tanh_constant * tf.tanh(logits)

        config_id = tf.multinomial(logits, 1)
        config_id = tf.to_int32(config_id)
        config_id = tf.reshape(config_id, [1])
        arc_seq.append(config_id)
        log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=config_id)
        sample_log_probs.append(log_prob)

        inputs = tf.nn.embedding_lookup(self.w_emb, config_id)
    arc_seq = tf.concat(arc_seq, axis=0)
    self.sample_arc = arc_seq

    self.sample_log_probs = tf.concat(sample_log_probs, axis=0)
    self.ppl = tf.exp(tf.reduce_sum(self.sample_log_probs) /
                      tf.to_float(self.num_layers * self.num_branches))
    self.all_h = all_h

Source File: utils_tf.py From incremental_detectors with BSD 3-Clause "New" or "Revised" License

5 votes

def tf_random_sample(sz, *args):
    s = tf.reshape((tf.shape(args[0])[0]), (1, ))
    ar = tf.expand_dims(tf.log(tf.tile([10.], s)), 0)
    sample = tf.multinomial(ar, sz)[0]
    return tuple(tf.gather(a, sample) for a in args)

Source File: categorical.py From garage with MIT License

5 votes

def sample_sym(self, dist_info, name='sample_sym'):
        with tf.name_scope(name):
            probs = dist_info['prob']
            samples = tf.multinomial(tf.math.log(probs + 1e-8),
                                     num_samples=1)[:, 0]

            return tf.nn.embedding_lookup(np.eye(self.dim, dtype=np.float32),
                                          samples)

Python tensorflow.multinomial() Examples