Python Examples of tensorflow.python.ops.embedding_ops.embedding

Source File: tf_seq2seq_model.py From Conditional-SeqGAN-Tensorflow with MIT License

7 votes

def _argmax_or_mcsearch(embedding, output_projection=None, update_embedding=True, mc_search=False):
    def loop_function(prev, _):
        if output_projection is not None:
            prev = nn_ops.xw_plus_b(prev, output_projection[0], output_projection[1])


        if isinstance(mc_search, bool):
            prev_symbol = tf.reshape(tf.multinomial(prev, 1), [-1]) if mc_search else math_ops.argmax(prev, 1)
        else:
            prev_symbol = tf.cond(mc_search, lambda: tf.reshape(tf.multinomial(prev, 1), [-1]), lambda: tf.argmax(prev, 1))


        emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol)
        if not update_embedding:
            emb_prev = array_ops.stop_gradient(emb_prev)
        return emb_prev
    return loop_function

Source File: gmm_ops.py From auto-alt-text-lambda-api with MIT License

7 votes

def _init_clusters_random(data, num_clusters, random_seed):
  """Does random initialization of clusters.

  Args:
    data: a list of Tensors with a matrix of data, each row is an example.
    num_clusters: an integer with the number of clusters.
    random_seed: Seed for PRNG used to initialize seeds.

  Returns:
    A Tensor with num_clusters random rows of data.
  """
  assert isinstance(data, list)
  num_data = math_ops.add_n([array_ops.shape(inp)[0] for inp in data])
  with ops.control_dependencies(
      [check_ops.assert_less_equal(num_clusters, num_data)]):
    indices = random_ops.random_uniform(
        [num_clusters],
        minval=0,
        maxval=math_ops.cast(num_data, dtypes.int64),
        seed=random_seed,
        dtype=dtypes.int64)
  indices = math_ops.cast(indices, dtypes.int32) % num_data
  clusters_init = embedding_lookup(data, indices, partition_strategy='div')
  return clusters_init

Source File: core_rnn_cell_impl.py From auto-alt-text-lambda-api with MIT License

7 votes

def __call__(self, inputs, state, scope=None):
    """Run the cell on embedded inputs."""
    with vs.variable_scope(scope or "embedding_wrapper"):  # "EmbeddingWrapper"
      with ops.device("/cpu:0"):
        if self._initializer:
          initializer = self._initializer
        elif vs.get_variable_scope().initializer:
          initializer = vs.get_variable_scope().initializer
        else:
          # Default initializer for embeddings should have variance=1.
          sqrt3 = math.sqrt(3)  # Uniform(-sqrt(3), sqrt(3)) has variance=1.
          initializer = init_ops.random_uniform_initializer(-sqrt3, sqrt3)

        if type(state) is tuple:
          data_type = state[0].dtype
        else:
          data_type = state.dtype

        embedding = vs.get_variable(
            "embedding", [self._embedding_classes, self._embedding_size],
            initializer=initializer,
            dtype=data_type)
        embedded = embedding_ops.embedding_lookup(
            embedding, array_ops.reshape(inputs, [-1]))
    return self._cell(embedded, state)

Source File: mod_core_rnn_cell_impl.py From RGAN with MIT License

6 votes

def __call__(self, inputs, state, scope=None):
    """Run the cell on embedded inputs."""
    with _checked_scope(self, scope or "embedding_wrapper", reuse=self._reuse):
      with ops.device("/cpu:0"):
        if self._initializer:
          initializer = self._initializer
        elif vs.get_variable_scope().initializer:
          initializer = vs.get_variable_scope().initializer
        else:
          # Default initializer for embeddings should have variance=1.
          sqrt3 = math.sqrt(3)  # Uniform(-sqrt(3), sqrt(3)) has variance=1.
          initializer = init_ops.random_uniform_initializer(-sqrt3, sqrt3)

        if type(state) is tuple:
          data_type = state[0].dtype
        else:
          data_type = state.dtype

        embedding = vs.get_variable(
            "embedding", [self._embedding_classes, self._embedding_size],
            initializer=initializer,
            dtype=data_type)
        embedded = embedding_ops.embedding_lookup(
            embedding, array_ops.reshape(inputs, [-1]))
    return self._cell(embedded, state)

Source File: clustering_ops.py From auto-alt-text-lambda-api with MIT License

6 votes

def _init_clusters_random(self):
    """Does random initialization of clusters.

    Returns:
      Tensor of randomly initialized clusters.
    """
    num_data = math_ops.add_n([array_ops.shape(inp)[0] for inp in self._inputs])
    # Note that for mini-batch k-means, we should ensure that the batch size of
    # data used during initialization is sufficiently large to avoid duplicated
    # clusters.
    with ops.control_dependencies(
        [check_ops.assert_less_equal(self._num_clusters, num_data)]):
      indices = random_ops.random_uniform(
          array_ops.reshape(self._num_clusters, [-1]),
          minval=0,
          maxval=math_ops.cast(num_data, dtypes.int64),
          seed=self._random_seed,
          dtype=dtypes.int64)
      clusters_init = embedding_lookup(
          self._inputs, indices, partition_strategy='div')
      return clusters_init

Source File: gmm_ops.py From lambda-packs with MIT License

6 votes

def _init_clusters_random(data, num_clusters, random_seed):
  """Does random initialization of clusters.

  Args:
    data: a list of Tensors with a matrix of data, each row is an example.
    num_clusters: an integer with the number of clusters.
    random_seed: Seed for PRNG used to initialize seeds.

  Returns:
    A Tensor with num_clusters random rows of data.
  """
  assert isinstance(data, list)
  num_data = math_ops.add_n([array_ops.shape(inp)[0] for inp in data])
  with ops.control_dependencies(
      [check_ops.assert_less_equal(num_clusters, num_data)]):
    indices = random_ops.random_uniform(
        [num_clusters],
        minval=0,
        maxval=math_ops.cast(num_data, dtypes.int64),
        seed=random_seed,
        dtype=dtypes.int64)
  indices %= math_ops.cast(num_data, dtypes.int64)
  clusters_init = embedding_lookup(data, indices, partition_strategy='div')
  return clusters_init

Source File: clustering_ops.py From lambda-packs with MIT License

6 votes

def _init_clusters_random(self):
    """Does random initialization of clusters.

    Returns:
      Tensor of randomly initialized clusters.
    """
    num_data = math_ops.add_n([array_ops.shape(inp)[0] for inp in self._inputs])
    # Note that for mini-batch k-means, we should ensure that the batch size of
    # data used during initialization is sufficiently large to avoid duplicated
    # clusters.
    with ops.control_dependencies(
        [check_ops.assert_less_equal(self._num_clusters, num_data)]):
      indices = random_ops.random_uniform(
          array_ops.reshape(self._num_clusters, [-1]),
          minval=0,
          maxval=math_ops.cast(num_data, dtypes.int64),
          seed=self._random_seed,
          dtype=dtypes.int64)
      clusters_init = embedding_lookup(
          self._inputs, indices, partition_strategy='div')
      return clusters_init

Source File: core_rnn_cell.py From lambda-packs with MIT License

6 votes

def call(self, inputs, state):
    """Run the cell on embedded inputs."""
    with ops.device("/cpu:0"):
      if self._initializer:
        initializer = self._initializer
      elif vs.get_variable_scope().initializer:
        initializer = vs.get_variable_scope().initializer
      else:
        # Default initializer for embeddings should have variance=1.
        sqrt3 = math.sqrt(3)  # Uniform(-sqrt(3), sqrt(3)) has variance=1.
        initializer = init_ops.random_uniform_initializer(-sqrt3, sqrt3)

      if isinstance(state, tuple):
        data_type = state[0].dtype
      else:
        data_type = state.dtype

      embedding = vs.get_variable(
          "embedding", [self._embedding_classes, self._embedding_size],
          initializer=initializer,
          dtype=data_type)
      embedded = embedding_ops.embedding_lookup(embedding,
                                                array_ops.reshape(inputs, [-1]))

      return self._cell(embedded, state)

Source File: my_seq2seq.py From Neural_Conversation_Models with Apache License 2.0

6 votes

def _extract_argmax_and_embed(embedding, output_projection=None,
                              update_embedding=True):
  """Get a loop_function that extracts the previous symbol and embeds it.
  Args:
    embedding: embedding tensor for symbols.
    output_projection: None or a pair (W, B). If provided, each fed previous
      output will first be multiplied by W and added B.
    update_embedding: Boolean; if False, the gradients will not propagate
      through the embeddings.
  Returns:
    A loop function.
  """
  def loop_function(prev, _):
    if output_projection is not None:
      prev = nn_ops.xw_plus_b(
          prev, output_projection[0], output_projection[1])
    prev_symbol = math_ops.argmax(prev, 1)
    # Note that gradients will not propagate through the second parameter of
    # embedding_lookup.
    emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol)
    if not update_embedding:
      emb_prev = array_ops.stop_gradient(emb_prev)
    return emb_prev
  return loop_function

Source File: qa_model.py From cs224n-win18-squad with Apache License 2.0

6 votes

def add_embedding_layer(self, emb_matrix):
        """
        Adds word embedding layer to the graph.

        Inputs:
          emb_matrix: shape (400002, embedding_size).
            The GloVe vectors, plus vectors for PAD and UNK.
        """
        with vs.variable_scope("embeddings"):

            # Note: the embedding matrix is a tf.constant which means it's not a trainable parameter
            embedding_matrix = tf.constant(emb_matrix, dtype=tf.float32, name="emb_matrix") # shape (400002, embedding_size)

            # Get the word embeddings for the context and question,
            # using the placeholders self.context_ids and self.qn_ids
            self.context_embs = embedding_ops.embedding_lookup(embedding_matrix, self.context_ids) # shape (batch_size, context_len, embedding_size)
            self.qn_embs = embedding_ops.embedding_lookup(embedding_matrix, self.qn_ids) # shape (batch_size, question_len, embedding_size)

Source File: seq2seq_helper.py From demo2program with MIT License

6 votes

def __init__(self, dsl_syntax, max_program_len, embedding, start_tokens, end_token, seed=None):
        """Initializer.

        Args:
            dsl_syntax: Syntax checker for generating next possible tokens.
            max_program_len: maximum program length
            embedding: A callable that takes a vector tensor of `ids` (argmax ids),
                or the `params` argument for `embedding_lookup`. The returned tensor
                will be passed to the decoder input.
            start_tokens: `int32` vector shaped `[batch_size]`, the start tokens.
            end_token: `int32` scalar, the token that marks end of decoding.
            seed: The sampling seed.

        Raises:
            ValueError: if `start_tokens` is not a 1D tensor or `end_token` is not a
                scalar.
        """
        super(SyntacticSampleEmbeddingHelper, self).__init__(
            dsl_syntax, max_program_len, embedding, start_tokens, end_token)
        self._seed = seed

Source File: RankLSTM_model.py From Deep-Listwise-Context-Model-for-Ranking-Refinement with Apache License 2.0

6 votes

def embedding_rnn_decoder(self,initial_state, cell,
							attention_states, encode_embed, num_heads=1,
							 output_projection=None,
							 feed_previous=False,
							 update_embedding_for_previous=True, scope=None):
		"""RNN decoder with embedding and a pure-decoding option.

		"""
		if output_projection is not None:
			proj_weights = ops.convert_to_tensor(output_projection[0],
												 dtype=dtypes.float32)
			proj_weights.get_shape().assert_is_compatible_with([None, num_symbols])
			proj_biases = ops.convert_to_tensor(
				output_projection[1], dtype=dtypes.float32)
			proj_biases.get_shape().assert_is_compatible_with([num_symbols])

		with variable_scope.variable_scope(scope or "embedding_rnn_decoder"):
			loop_function = self._extract_argmax_and_embed(
				encode_embed, output_projection,
				update_embedding_for_previous) if feed_previous else None
			# emb_inp = (
			#	embedding_ops.embedding_lookup(embeddings, i) for i in decoder_inputs)
			#emb_inp = decoder_embed
			return self.rnn_decoder(encode_embed, attention_states, initial_state, cell,
								num_heads=num_heads, loop_function=loop_function)

Source File: rnn_cell.py From ROLO with Apache License 2.0

6 votes

def __call__(self, inputs, state, scope=None):
    """Run the cell on embedded inputs."""
    with vs.variable_scope(scope or type(self).__name__):  # "EmbeddingWrapper"
      with ops.device("/cpu:0"):
        if self._initializer:
          initializer = self._initializer
        elif vs.get_variable_scope().initializer:
          initializer = vs.get_variable_scope().initializer
        else:
          # Default initializer for embeddings should have variance=1.
          sqrt3 = math.sqrt(3)  # Uniform(-sqrt(3), sqrt(3)) has variance=1.
          initializer = init_ops.random_uniform_initializer(-sqrt3, sqrt3)

        if type(state) is tuple:
          data_type = state[0].dtype
        else:
          data_type = state.dtype

        embedding = vs.get_variable(
            "embedding", [self._embedding_classes, self._embedding_size],
            initializer=initializer,
            dtype=data_type)
        embedded = embedding_ops.embedding_lookup(
            embedding, array_ops.reshape(inputs, [-1]))
    return self._cell(embedded, state)

Source File: core_rnn_cell.py From Multiview2Novelview with MIT License

6 votes

def call(self, inputs, state):
    """Run the cell on embedded inputs."""
    with ops.device("/cpu:0"):
      if self._initializer:
        initializer = self._initializer
      elif vs.get_variable_scope().initializer:
        initializer = vs.get_variable_scope().initializer
      else:
        # Default initializer for embeddings should have variance=1.
        sqrt3 = math.sqrt(3)  # Uniform(-sqrt(3), sqrt(3)) has variance=1.
        initializer = init_ops.random_uniform_initializer(-sqrt3, sqrt3)

      if isinstance(state, tuple):
        data_type = state[0].dtype
      else:
        data_type = state.dtype

      embedding = vs.get_variable(
          "embedding", [self._embedding_classes, self._embedding_size],
          initializer=initializer,
          dtype=data_type)
      embedded = embedding_ops.embedding_lookup(embedding,
                                                array_ops.reshape(inputs, [-1]))

      return self._cell(embedded, state)

Source File: rnn_cell.py From ecm with Apache License 2.0

6 votes

def __call__(self, inputs, state, scope=None):
        """Run the cell on embedded inputs."""
        with vs.variable_scope(scope or type(self).__name__):    # "EmbeddingWrapper"
            with ops.device("/cpu:0"):
                if self._initializer:
                    initializer = self._initializer
                elif vs.get_variable_scope().initializer:
                    initializer = vs.get_variable_scope().initializer
                else:
                    # Default initializer for embeddings should have variance=1.
                    sqrt3 = math.sqrt(3)    # Uniform(-sqrt(3), sqrt(3)) has variance=1.
                    initializer = init_ops.random_uniform_initializer(-sqrt3, sqrt3)

                if type(state) is tuple:
                    data_type = state[0].dtype
                else:
                    data_type = state.dtype

                embedding = vs.get_variable(
                        "embedding", [self._embedding_classes, self._embedding_size],
                        initializer=initializer,
                        dtype=data_type)
                embedded = embedding_ops.embedding_lookup(
                        embedding, array_ops.reshape(inputs, [-1]))
        return self._cell(embedded, state)

Source File: helper.py From OpenSeq2Seq with Apache License 2.0

6 votes

def __init__(self, embedding, start_tokens, end_token,
               softmax_temperature=None, seed=None):
    """Initializer.
    Args:
      embedding: A callable that takes a vector tensor of `ids` (argmax ids),
        or the `params` argument for `embedding_lookup`. The returned tensor
        will be passed to the decoder input.
      start_tokens: `int32` vector shaped `[batch_size]`, the start tokens.
      end_token: `int32` scalar, the token that marks end of decoding.
      softmax_temperature: (Optional) `float32` scalar, value to divide the
        logits by before computing the softmax. Larger values (above 1.0) result
        in more random samples, while smaller values push the sampling
        distribution towards the argmax. Must be strictly greater than 0.
        Defaults to 1.0.
      seed: (Optional) The sampling seed.
    Raises:
      ValueError: if `start_tokens` is not a 1D tensor or `end_token` is not a
        scalar.
    """
    super(SampleEmbeddingHelper, self).__init__(
        embedding, start_tokens, end_token)
    self._softmax_temperature = softmax_temperature
    self._seed = seed

Source File: tf_seq2seq_model.py From Conditional-SeqGAN-Tensorflow with MIT License

6 votes

def _extract_argmax_and_embed(embedding, output_projection=None, update_embedding=True):
  """Get a loop_function that extracts the previous symbol and embeds it.

  Args:
    embedding: embedding tensor for symbols.
    output_projection: None or a pair (W, B). If provided, each fed previous
      output will first be multiplied by W and added B.
    update_embedding: Boolean; if False, the gradients will not propagate
      through the embeddings.

  Returns:
    A loop function.
  """
  def loop_function(prev, _):
    if output_projection is not None:
      prev = nn_ops.xw_plus_b(
          prev, output_projection[0], output_projection[1])
    prev_symbol = math_ops.argmax(prev, 1)
    # Note that gradients will not propagate through the second parameter of
    # embedding_lookup.
    emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol)
    if not update_embedding:
      emb_prev = array_ops.stop_gradient(emb_prev)
    return emb_prev
  return loop_function

Source File: embedding.py From NJUNMT-tf with Apache License 2.0

6 votes

def _add_emb_signal(self, x, time):
        """ add position embedding

        :param x:
        :param time:
        :return:
        """
        x_ndims = x.get_shape().ndims
        if x_ndims == 2:
            position = ops.convert_to_tensor(time, dtype=dtypes.int32)
        elif x_ndims == 3:
            position = math_ops.range(array_ops.shape(x)[1])
        else:
            raise ValueError("need a Tensor with rank 2 or 3")
        position_emb = embedding_ops.embedding_lookup(
            self._position_embedding, position)
        return x + array_ops.expand_dims(position_emb, 0)

Source File: clustering_ops.py From deep_image_model with Apache License 2.0

6 votes

def _init_clusters_random(self):
    """Does random initialization of clusters.

    Returns:
      Tensor of randomly initialized clusters.
    """
    num_data = tf.add_n([tf.shape(inp)[0] for inp in self._inputs])
    # Note that for mini-batch k-means, we should ensure that the batch size of
    # data used during initialization is sufficiently large to avoid duplicated
    # clusters.
    with tf.control_dependencies(
        [tf.assert_less_equal(self._num_clusters, num_data)]):
      indices = tf.random_uniform(tf.reshape(self._num_clusters, [-1]),
                                  minval=0,
                                  maxval=tf.cast(num_data, tf.int64),
                                  seed=self._random_seed,
                                  dtype=tf.int64)
      clusters_init = embedding_lookup(self._inputs, indices,
                                       partition_strategy='div')
      return clusters_init

Source File: rnn_cell.py From deep_image_model with Apache License 2.0

6 votes

def __call__(self, inputs, state, scope=None):
    """Run the cell on embedded inputs."""
    with vs.variable_scope(scope or type(self).__name__):  # "EmbeddingWrapper"
      with ops.device("/cpu:0"):
        if self._initializer:
          initializer = self._initializer
        elif vs.get_variable_scope().initializer:
          initializer = vs.get_variable_scope().initializer
        else:
          # Default initializer for embeddings should have variance=1.
          sqrt3 = math.sqrt(3)  # Uniform(-sqrt(3), sqrt(3)) has variance=1.
          initializer = init_ops.random_uniform_initializer(-sqrt3, sqrt3)

        if type(state) is tuple:
          data_type = state[0].dtype
        else:
          data_type = state.dtype

        embedding = vs.get_variable(
            "embedding", [self._embedding_classes, self._embedding_size],
            initializer=initializer,
            dtype=data_type)
        embedded = embedding_ops.embedding_lookup(
            embedding, array_ops.reshape(inputs, [-1]))
    return self._cell(embedded, state)

Source File: control_flow_ops_test.py From deep_image_model with Apache License 2.0

6 votes

def testIndexedSlicesGradient(self):
    with ops.Graph().as_default():
      embedding_matrix = tf.get_variable(
          "embedding_matrix", [5, 5],
          initializer=tf.random_normal_initializer())
      def Cond(it, _):
        return it < 5
      def Body(it, cost):
        embedding = embedding_ops.embedding_lookup(embedding_matrix + 0.0, [0])
        cost += tf.reduce_sum(embedding)
        return it + 1, cost
      _, cost = control_flow_ops.while_loop(
          Cond, Body, [tf.constant(0), tf.constant(0.0)])
      optimizer = momentum.MomentumOptimizer(0.1, 0.9)
      train_op = optimizer.minimize(cost)
      with self.test_session() as sess:
        sess.run(tf.global_variables_initializer())
        for _ in range(10):
          sess.run([train_op])

Source File: RankLSTM_model.py From Deep-Listwise-Context-Model-for-Ranking-Refinement with Apache License 2.0

6 votes

def listMLE(self, output, target_indexs, target_rels, name=None):
		loss = None
		with ops.name_scope(name, "listMLE",[output] + target_indexs + target_rels):
			output = tf.nn.l2_normalize(output, 1)
			loss = -1.0 * math_ops.reduce_sum(output,1)
			print(loss.get_shape())
			exp_output = tf.exp(output)
			exp_output_table = tf.reshape(exp_output,[-1])
			print(exp_output.get_shape())
			print(exp_output_table.get_shape())
			sum_exp_output = math_ops.reduce_sum(exp_output,1)
			loss = tf.add(loss, tf.log(sum_exp_output))
			#compute MLE
			for i in xrange(self.rank_list_size-1):
				idx = target_indexs[i] + tf.to_int64(self.batch_index_bias)
				y_i = embedding_ops.embedding_lookup(exp_output_table, idx)
				#y_i = tf.gather_nd(exp_output, idx)
				sum_exp_output = tf.subtract(sum_exp_output, y_i)
				loss = tf.add(loss, tf.log(sum_exp_output))
		batch_size = tf.shape(target_rels[0])[0]
		return math_ops.reduce_sum(loss) / math_ops.cast(batch_size, dtypes.float32)

Source File: helper.py From reaction_prediction_seq2seq with Apache License 2.0

5 votes

def __init__(self, inputs, sequence_length, embedding, sampling_probability,
               time_major=False, seed=None, scheduling_seed=None, name=None):
    """Initializer.

    Args:
      inputs: A (structure of) input tensors.
      sequence_length: An int32 vector tensor.
      embedding: A callable that takes a vector tensor of `ids` (argmax ids),
        or the `params` argument for `embedding_lookup`.
      sampling_probability: A 0D `float32` tensor: the probability of sampling
        categorically from the output ids instead of reading directly from the
        inputs.
      time_major: Python bool.  Whether the tensors in `inputs` are time major.
        If `False` (default), they are assumed to be batch major.
      seed: The sampling seed.
      scheduling_seed: The schedule decision rule sampling seed.
      name: Name scope for any created operations.

    Raises:
      ValueError: if `sampling_probability` is not a scalar or vector.
    """
    with ops.name_scope(name, "ScheduledEmbeddingSamplingWrapper",
                        [embedding, sampling_probability]):
      if callable(embedding):
        self._embedding_fn = embedding
      else:
        self._embedding_fn = (
            lambda ids: embedding_ops.embedding_lookup(embedding, ids))
      self._sampling_probability = ops.convert_to_tensor(
          sampling_probability, name="sampling_probability")
      if self._sampling_probability.get_shape().ndims not in (0, 1):
        raise ValueError(
            "sampling_probability must be either a scalar or a vector. "
            "saw shape: %s" % (self._sampling_probability.get_shape()))
      self._seed = seed
      self._scheduling_seed = scheduling_seed
      super(ScheduledEmbeddingTrainingHelper, self).__init__(
          inputs=inputs,
          sequence_length=sequence_length,
          time_major=time_major,
          name=name)

Source File: seq2seq.py From DeepAffinity with GNU General Public License v3.0

5 votes

def _extract_argmax_and_embed(embedding,
                              output_projection=None,
                              update_embedding=True):
  """Get a loop_function that extracts the previous symbol and embeds it.

  Args:
    embedding: embedding tensor for symbols.
    output_projection: None or a pair (W, B). If provided, each fed previous
      output will first be multiplied by W and added B.
    update_embedding: Boolean; if False, the gradients will not propagate
      through the embeddings.

  Returns:
    A loop function.
  """

  def loop_function(prev, _):
    if output_projection is not None:
      prev = nn_ops.xw_plus_b(prev, output_projection[0], output_projection[1])
    prev_symbol = math_ops.argmax(prev, 1)
    # Note that gradients will not propagate through the second parameter of
    # embedding_lookup.
    emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol)
    if not update_embedding:
      emb_prev = array_ops.stop_gradient(emb_prev)
    return emb_prev

  return loop_function

Source File: seq2seq.py From DeepAffinity with GNU General Public License v3.0

5 votes

def _extract_argmax_and_embed(embedding,
                              output_projection=None,
                              update_embedding=True):
  """Get a loop_function that extracts the previous symbol and embeds it.

  Args:
    embedding: embedding tensor for symbols.
    output_projection: None or a pair (W, B). If provided, each fed previous
      output will first be multiplied by W and added B.
    update_embedding: Boolean; if False, the gradients will not propagate
      through the embeddings.

  Returns:
    A loop function.
  """

  def loop_function(prev, _):
    if output_projection is not None:
      prev = nn_ops.xw_plus_b(prev, output_projection[0], output_projection[1])
    prev_symbol = math_ops.argmax(prev, 1)
    # Note that gradients will not propagate through the second parameter of
    # embedding_lookup.
    emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol)
    if not update_embedding:
      emb_prev = array_ops.stop_gradient(emb_prev)
    return emb_prev

  return loop_function

Source File: helper.py From OpenSeq2Seq with Apache License 2.0

5 votes

def __init__(self, embedding, start_tokens, end_token, positional_embedding=None):
    """Initializer.
    Args:
      embedding: A callable that takes a vector tensor of `ids` (argmax ids),
        or the `params` argument for `embedding_lookup`. The returned tensor
        will be passed to the decoder input.
      start_tokens: `int32` vector shaped `[batch_size]`, the start tokens.
      end_token: `int32` scalar, the token that marks end of decoding.
    Raises:
      ValueError: if `start_tokens` is not a 1D tensor or `end_token` is not a
        scalar.
    """
    if callable(embedding):
      self._embedding_fn = embedding
    else:
      self._embedding_fn = (
          lambda ids: embedding_ops.embedding_lookup(embedding, ids))

    self._use_pos_embedding = False
    if positional_embedding is not None:
      if callable(positional_embedding):
        self._pos_embedding_fn = positional_embedding
      else:
        self._pos_embedding_fn = (
            lambda ids: embedding_ops.embedding_lookup(positional_embedding, ids))
      self._use_pos_embedding = True

    self._start_tokens = ops.convert_to_tensor(
        start_tokens, dtype=dtypes.int32, name="start_tokens")
    self._end_token = ops.convert_to_tensor(
        end_token, dtype=dtypes.int32, name="end_token")
    if self._start_tokens.get_shape().ndims != 1:
      raise ValueError("start_tokens must be a vector")
    self._batch_size = array_ops.size(start_tokens)
    if self._end_token.get_shape().ndims != 0:
      raise ValueError("end_token must be a scalar")
    self._start_inputs = self._embedding_fn(self._start_tokens)
    if self._use_pos_embedding:
      # change dtype for mixed precision
      self._start_inputs += self._pos_embedding_fn(ops.convert_to_tensor(0))

Source File: seq2seq.py From DeepAffinity with GNU General Public License v3.0

5 votes

def _extract_argmax_and_embed(embedding,
                              output_projection=None,
                              update_embedding=True):
  """Get a loop_function that extracts the previous symbol and embeds it.

  Args:
    embedding: embedding tensor for symbols.
    output_projection: None or a pair (W, B). If provided, each fed previous
      output will first be multiplied by W and added B.
    update_embedding: Boolean; if False, the gradients will not propagate
      through the embeddings.

  Returns:
    A loop function.
  """

  def loop_function(prev, _):
    if output_projection is not None:
      prev = nn_ops.xw_plus_b(prev, output_projection[0], output_projection[1])
    prev_symbol = math_ops.argmax(prev, 1)
    # Note that gradients will not propagate through the second parameter of
    # embedding_lookup.
    emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol)
    if not update_embedding:
      emb_prev = array_ops.stop_gradient(emb_prev)
    return emb_prev

  return loop_function

Source File: embedding.py From NJUNMT-tf with Apache License 2.0

5 votes

def embed_words(self, words, time=0):
        """ embed the word

        :param words: 1/2-dim tensor, the first dimension indicates batch_size
        :param time: indicating the position
        :return: embeddings: [batch_size, length, dim_emb]
        """
        emb = embedding_ops.embedding_lookup(self._embeddings, words)
        return self._add_timing_signal(emb, time)

Source File: seq2seq.py From DeepAffinity with GNU General Public License v3.0

5 votes

def _extract_argmax_and_embed(embedding,
                              output_projection=None,
                              update_embedding=True):
  """Get a loop_function that extracts the previous symbol and embeds it.

  Args:
    embedding: embedding tensor for symbols.
    output_projection: None or a pair (W, B). If provided, each fed previous
      output will first be multiplied by W and added B.
    update_embedding: Boolean; if False, the gradients will not propagate
      through the embeddings.

  Returns:
    A loop function.
  """

  def loop_function(prev, _):
    if output_projection is not None:
      prev = nn_ops.xw_plus_b(prev, output_projection[0], output_projection[1])
    prev_symbol = math_ops.argmax(prev, 1)
    # Note that gradients will not propagate through the second parameter of
    # embedding_lookup.
    emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol)
    if not update_embedding:
      emb_prev = array_ops.stop_gradient(emb_prev)
    return emb_prev

  return loop_function

Source File: seq2seq_helper.py From demo2program with MIT License

5 votes

def __init__(self, dsl_syntax, max_program_len, embedding, start_tokens, end_token):
        """Initializer.

        Args:
            dsl_syntax: Syntax checker for generating next possible tokens.
            max_program_len: maximum program length
            embedding: A callable that takes a vector tensor of `ids` (argmax ids),
                or the `params` argument for `embedding_lookup`. The returned tensor
                will be passed to the decoder input.
            start_tokens: `int32` vector shaped `[batch_size]`, the start tokens
            end_token: `int32` scalar, the token that marks end of decoding.

        Raises:
            ValueError: if `start_tokens` is not a 1D tensor or `end_token` is not a
                scalar.
        """
        self.dsl_syntax = dsl_syntax
        self.max_program_len = max_program_len
        self.previous_tokens = []
        self.previous_probs = []
        self.previous_masks = []

        if callable(embedding):
            self._embedding_fn = embedding
        else:
            self._embedding_fn = (
                lambda ids: embedding_ops.embedding_lookup(embedding, ids))

        self._start_tokens = ops.convert_to_tensor(
            start_tokens, dtype=dtypes.int32, name="start_tokens")
        self._end_token = ops.convert_to_tensor(
            end_token, dtype=dtypes.int32, name="end_token")
        if self._start_tokens.get_shape().ndims != 1:
            raise ValueError("start_tokens must be a vector")
        self._batch_size = array_ops.size(start_tokens)
        if self._end_token.get_shape().ndims != 0:
            raise ValueError("end_token must be a scalar")
        self._start_inputs = self._embedding_fn(self._start_tokens)

Python tensorflow.python.ops.embedding_ops.embedding_lookup() Examples