Python tensorflow.RaggedTensor() Examples

The following are 23 code examples of tensorflow.RaggedTensor(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function .
Example #1
Source File: export.py    From estimator with Apache License 2.0 6 votes vote down vote up
def _check_tensor(tensor, name, error_label='feature'):
  """Check that passed `tensor` is a Tensor or SparseTensor or RaggedTensor."""
  if not (isinstance(tensor, tf.Tensor) or
          isinstance(tensor, tf.sparse.SparseTensor) or
          isinstance(tensor, tf.RaggedTensor)):
    fmt_name = ' {}'.format(name) if name else ''
    value_error = ValueError('{}{} must be a Tensor, SparseTensor, or '
                             'RaggedTensor.'.format(error_label, fmt_name))
    # NOTE(ericmc): This if-else block is a specific carve-out for
    # LabeledTensor, which has a `.tensor` attribute and which is
    # convertible to tf.Tensor via ops.convert_to_tensor.
    # Allowing all types convertible to tf.Tensor is considered by soergel@
    # to be too permissive.
    # TODO(soergel): accept any type convertible to Tensor,
    # as in cl/193238295 snapshot #6.
    if hasattr(tensor, 'tensor'):
      try:
        ops.convert_to_tensor(tensor)
      except TypeError:
        raise value_error
    else:
      raise value_error 
Example #2
Source File: tokenizer.py    From OpenNMT-tf with MIT License 6 votes vote down vote up
def tokenize(self, text):
    """Tokenizes text.

    Args:
      text: A string or batch of strings to tokenize as a ``tf.Tensor`` or
        Python values.

    Returns:
      - If :obj:`text` is a Python string, a list of Python strings.
      - If :obj:`text` is a list of Python strings, a list of list of Python
        strings.
      - If :obj:`text` is a 0-D ``tf.Tensor``, a 1-D ``tf.Tensor``.
      - If :obj:`text` is a 1-D ``tf.Tensor``, a 2-D ``tf.RaggedTensor``.

    Raises:
      ValueError: if the rank of :obj:`text` is greater than 1.
    """
    with tf.device("cpu:0"):
      return self._tokenize(text) 
Example #3
Source File: tokenizer.py    From OpenNMT-tf with MIT License 6 votes vote down vote up
def detokenize(self, tokens, sequence_length=None):
    """Detokenizes tokens.

    The Tensor version supports batches of tokens.

    Args:
      tokens: Tokens or batch of tokens as a ``tf.Tensor``, ``tf.RaggedTensor``,
        or Python values.
      sequence_length: The length of each sequence. Required if :obj:`tokens`
        is a dense 2-D ``tf.Tensor``.

    Returns:
      - If :obj:`tokens` is a list of list of Python strings, a list of Python strings.
      - If :obj:`tokens` is a list of Python strings, a Python string.
      - If :obj:`tokens` is a N-D ``tf.Tensor`` (or ``tf.RaggedTensor``), a
        (N-1)-D ``tf.Tensor``.

    Raises:
      ValueError: if the rank of :obj:`tokens` is greater than 2.
      ValueError: if :obj:`tokens` is a 2-D dense ``tf.Tensor`` and
        :obj:`sequence_length` is not set.
    """
    with tf.device("cpu:0"):
      return self._detokenize(tokens, sequence_length) 
Example #4
Source File: tokenizer.py    From OpenNMT-tf with MIT License 6 votes vote down vote up
def _detokenize(self, tokens, sequence_length):
    if isinstance(tokens, tf.RaggedTensor):
      rank = len(tokens.shape)
      if rank == 1:
        return self._detokenize_tensor(tokens.values)
      elif rank == 2:
        return self._detokenize_ragged_tensor(tokens)
      else:
        raise ValueError("Unsupported RaggedTensor rank %d for detokenization" % rank)
    elif tf.is_tensor(tokens):
      rank = len(tokens.shape)
      if rank == 1:
        return self._detokenize_tensor(tokens)
      elif rank == 2:
        if sequence_length is None:
          raise ValueError("sequence_length is required for Tensor detokenization")
        return self._detokenize_batch_tensor(tokens, sequence_length)
      else:
        raise ValueError("Unsupported tensor rank %d for detokenization" % rank)
    elif isinstance(tokens, list) and tokens and isinstance(tokens[0], list):
      return list(map(self.detokenize, tokens))
    else:
      tokens = [tf.compat.as_text(token) for token in tokens]
      return self._detokenize_string(tokens) 
Example #5
Source File: tensor_adapter_test.py    From tfx-bsl with Apache License 2.0 6 votes vote down vote up
def testRaggedTensor(self, tensor_representation_textpb, record_batch,
                       expected_type_spec, expected_ragged_tensor):
    tensor_representation = text_format.Parse(tensor_representation_textpb,
                                              schema_pb2.TensorRepresentation())
    adapter = tensor_adapter.TensorAdapter(
        tensor_adapter.TensorAdapterConfig(record_batch.schema,
                                           {"output": tensor_representation}))
    converted = adapter.ToBatchTensors(record_batch)
    self.assertLen(converted, 1)
    self.assertIn("output", converted)
    actual_output = converted["output"]
    self.assertIsInstance(
        actual_output, (tf.RaggedTensor, tf.compat.v1.ragged.RaggedTensorValue))
    if tf.executing_eagerly():
      self.assertTrue(
          expected_type_spec.is_compatible_with(actual_output),
          "{} is not compatible with spec {}".format(actual_output,
                                                     expected_type_spec))

    self.assertRaggedAllEqual(actual_output, expected_ragged_tensor)
    self.assertAdapterCanProduceNonEagerInEagerMode(adapter, record_batch) 
Example #6
Source File: saved_transform_io_test.py    From transform with Apache License 2.0 5 votes vote down vote up
def test_ragged_roundtrip(self):
    if not hasattr(meta_graph_pb2.TensorInfo, 'CompositeTensor'):
      self.skipTest('This version of TensorFlow does not support '
                    'CompositeTenors in TensorInfo.')
    export_path = os.path.join(tempfile.mkdtemp(), 'export')

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        input_float = tf.compat.v1.ragged.placeholder(tf.float32, ragged_rank=1,
                                                      value_shape=[])
        output = input_float / 2.0
        inputs = {'input': input_float}
        outputs = {'output': output}
        saved_transform_io.write_saved_transform_from_session(
            session, inputs, outputs, export_path)

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        splits = np.array([0, 2, 3], dtype=np.int64)
        values = np.array([1.0, 2.0, 4.0], dtype=np.float32)
        input_ragged = tf.RaggedTensor.from_row_splits(values, splits)

        # Using a computed input gives confidence that the graphs are fused
        inputs = {'input': input_ragged * 10}
        _, outputs = (
            saved_transform_io.partially_apply_saved_transform_internal(
                export_path, inputs))
        output_ragged = outputs['output']
        self.assertIsInstance(output_ragged, tf.RaggedTensor)
        result = session.run(output_ragged)

        # indices and shape unchanged; values multipled by 10 and divided by 2
        self.assertAllEqual(splits, result.row_splits)
        self.assertEqual([5.0, 10.0, 20.0], result.values.tolist()) 
Example #7
Source File: util.py    From model-analysis with Apache License 2.0 5 votes vote down vote up
def wrap_tensor_or_dict_of_tensors_in_identity(
    tensor_or_dict_of_tensors: types.TensorTypeMaybeDict
) -> types.TensorTypeMaybeDict:
  # pyformat: disable
  """Wrap the given Tensor / dict of Tensors in tf.identity.

  Args:
    tensor_or_dict_of_tensors: Tensor or dict of Tensors to wrap around.

  Workaround for TensorFlow issue #17568 (b/71769512).

  Returns:
    Tensor or dict of Tensors wrapped with tf.identity.

  Raises:
    ValueError: We could not wrap the given Tensor / dict of Tensors in
      tf.identity.
  """
  # pyformat: enable

  def _wrap_tensor_in_identity(tensor: types.TensorType) -> types.TensorType:
    if isinstance(tensor, (tf.Tensor, tf.RaggedTensor)):
      return tf.identity(tensor)
    elif isinstance(tensor, tf.SparseTensor):
      return tf.SparseTensor(
          indices=tf.identity(tensor.indices),
          values=tf.identity(tensor.values),
          dense_shape=tf.identity(tensor.dense_shape))
    else:
      raise ValueError('could not wrap Tensor %s in identity' % str(tensor))

  if isinstance(tensor_or_dict_of_tensors, dict):
    result = {}
    for k, v in tensor_or_dict_of_tensors.items():
      # Dictionary elements should only be Tensors (and not dictionaries).
      result[k] = _wrap_tensor_in_identity(v)
    return result
  else:
    return _wrap_tensor_in_identity(tensor_or_dict_of_tensors) 
Example #8
Source File: graph_tools.py    From transform with Apache License 2.0 5 votes vote down vote up
def get_dependent_inputs(graph, input_tensors, output_tensors):
  """Returns tensors in input_tensors that (transitively) produce output_tensors.

  Args:
    graph: A `tf.Graph`. It could be the (intermediate) output tf graph in any
      transform phase (including phase 0 where no tensor replacement has yet
      happened).
    input_tensors: A dict of logical name to `tf.Tensor`, `tf.SparseTensor`, or
      `tf.RaggedTensor`. Logical name doesn't have any implications in this
      method and can be anything. In some cases it is the feature name
      corresponding to the input tensor.
    output_tensors: A dict of logical name to `tf.Tensor`, `tf.SparseTensor`, or
      `tf.RaggedTensor`, or a list of `tf.Tensor`, `tf.SparseTensor`, or
      `tf.RaggedTensor`.

  Returns:
    A dict of logical name to `tf.Tensor`, `tf.SparseTensor`, or
    `tf.RaggedTensor` that are filtered from input_tensors (transitively)
    producing output_tensors
  """
  if isinstance(output_tensors, list):
    output_iterator = output_tensors
  else:
    output_iterator = six.itervalues(output_tensors)

  # Since this method may be called before all tensor replacements are ready, to
  # fulfill the precondition of InitializableGraphAnalyzer, we fake the
  # readiness of tensor replacements. Note that the readiness of replacement
  # tensors doesn't affect the correctness of dependencies tracing.
  tensor_sinks = graph.get_collection(analyzer_nodes.TENSOR_REPLACEMENTS)
  sink_tensors_ready = [(sink.tensor, False) for sink in tensor_sinks]
  graph_analyzer = InitializableGraphAnalyzer(graph, input_tensors,
                                              sink_tensors_ready)
  dependent_inputs = {}
  for output_tensor in output_iterator:
    dependent_inputs.update(graph_analyzer.get_dependent_inputs(output_tensor))
  return {
      name: tensor
      for name, tensor in six.iteritems(input_tensors)
      if name in dependent_inputs
  } 
Example #9
Source File: graph_tools.py    From transform with Apache License 2.0 5 votes vote down vote up
def get_dependent_inputs(self, tensor_or_op):
    """Gets the inputs that the given `tensor_or_op` transitively depends on.

    Args:
      tensor_or_op: A `Tensor`, `SparseTensor`, `RaggedTensor` or `Operation`.

    Returns:
      A dict of name to `Tensor`, `SparseTensor`, or `RaggedTensor` (sub-dict of
      `input_signature`) that the given `tensor_or_op` depends on.

    Raises:
      TypeError: If `tensor_or_op` is of an unsupported type.
    """
    if not isinstance(
        tensor_or_op,
        (tf.Tensor, tf.SparseTensor, tf.RaggedTensor, tf.Operation)):
      raise TypeError(
          'Expected Tensor, SparseTensor, RaggedTensor or Operation got {} of '
          'type {}'.format(tensor_or_op, type(tensor_or_op)))

    dependents = set()
    for component in _decompose_tensor_or_op(tensor_or_op):
      dependents.update(
          self._graph_analyzer.analyze_tensor(component).dependent_sources)

    result = {}
    for name, tensor in six.iteritems(self._input_signature):
      if any(
          tf_utils.hashable_tensor_or_op(component) in dependents
          for component in _decompose_tensor_or_op(tensor)):
        result[name] = tensor
    return result 
Example #10
Source File: graph_tools.py    From transform with Apache License 2.0 5 votes vote down vote up
def _make_source_infos_dict(self, input_signature, replaced_tensors_ready):
    """Builds a dictionary from source tensors to _SourceInfos.

    This dictionary stores information about the sources of the graph.
    Each tensor in replaced_tensors_ready is a source whose readiness is known
    and has no name.  Each tensor (or component of a tensor) in input_signature
    is ready to run and has a name determined by the signature.

    Args:
      input_signature: A dict whose keys are strings and values are `Tensor`s,
        `SparseTensor`s, or `RaggedTensor`s.
      replaced_tensors_ready: a dict from `Tensor`, `SparseTensor`s, or
      `RaggedTensor`s to bool indicating whether the tensor is ready in this
      phase.

    Returns:
      a dictionary from source tensors to _SourceInfos.
    """
    result = {}
    for tensor_or_op, is_ready in six.iteritems(replaced_tensors_ready):
      for component in _decompose_tensor_or_op(
          tf_utils.deref_tensor_or_op(tensor_or_op)):
        result[tf_utils.hashable_tensor_or_op(component)] = _SourceInfo(
            is_ready, None)

    for name, tensor in six.iteritems(input_signature):
      if isinstance(tensor, tf.Tensor):
        _set_unique_value_in_dict(result, tensor,
                                  _SourceInfo(True, '{}$tensor'.format(name)))
      elif isinstance(tensor, composite_tensor.CompositeTensor):
        for idx, tensor_component in enumerate(_decompose_tensor_or_op(tensor)):
          _set_unique_value_in_dict(
              result, tensor_component,
              _SourceInfo(True, '{}$composite_tensor_{}'.format(name, idx)))
      else:
        raise TypeError(
            'Expected Tensor, or CompositeTensor, got {} of type {}'.format(
                tensor, type(tensor)))
    return result 
Example #11
Source File: graph_tools.py    From transform with Apache License 2.0 5 votes vote down vote up
def ready_to_run(self, tensor_or_op):
    """Determine if a given tensor or op is ready to run.

    A tensor is ready to run if every tensor in all its transitive dependencies
    are set to `True` in `known_ready`.

    Note that if a placeholder is encountered, this will result in an error as
    it is assumed that all placeholders are keys in `known_ready`.  This is
    to avoid unexpected behavior when the user creates placeholders (as opposed
    to placeholders created by the tf.Transform framework).

    Similarly encountering a Table op is an error because a table should be
    a key in `known_ready` (in the case of analyzing the main session run) or
    should not be encountered (in the case of analyzing the graph init run).

    Args:
      tensor_or_op: A `Tensor`, `SparseTensor`, `RaggedTensor` or `Operation`

    Returns:
      A bool indicating whether then tensor is ready to run.

    Raises:
      ValueError: If a placeholder or table is encountered.
      _UnexpectedTableError: If an initializable table op is encountered.
      _UnexpectedPlaceholderError: If a placeholder is encountered.
    """
    if not isinstance(
        tensor_or_op,
        (tf.Tensor, tf.SparseTensor, tf.RaggedTensor, tf.Operation)):
      raise TypeError(
          'Expected Tensor, SparseTensor, RaggedTensor, or Operation got {} of type {}'
          .format(tensor_or_op, type(tensor_or_op)))
    return all(
        self.analyze_tensor(component).is_ready_to_run
        for component in _decompose_tensor_or_op(tensor_or_op)) 
Example #12
Source File: saved_transform_io_v2_test.py    From transform with Apache License 2.0 5 votes vote down vote up
def test_ragged_roundtrip(self):
    if not hasattr(meta_graph_pb2.TensorInfo, 'CompositeTensor'):
      self.skipTest('This version of TensorFlow does not support '
                    'CompositeTenors in TensorInfo.')
    export_path = os.path.join(tempfile.mkdtemp(), 'export')

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        input_float = tf.compat.v1.ragged.placeholder(tf.float32, ragged_rank=1,
                                                      value_shape=[])
        output = input_float / 2.0
        inputs = {'input': input_float}
        outputs = {'output': output}
        saved_transform_io.write_saved_transform_from_session(
            session, inputs, outputs, export_path)

    splits = np.array([0, 2, 3], dtype=np.int64)
    values = np.array([1.0, 2.0, 4.0], dtype=np.float32)
    input_ragged = tf.RaggedTensor.from_row_splits(values, splits)

    # Using a computed input gives confidence that the graphs are fused
    inputs = {'input': input_ragged * 10}
    saved_model_loader = saved_transform_io_v2.SavedModelLoader(export_path)
    outputs = saved_model_loader.apply_v1_transform_model_in_v2(inputs)
    result = outputs['output']
    self.assertIsInstance(result, tf.RaggedTensor)

    # indices and shape unchanged; values multipled by 10 and divided by 2
    self.assertAllEqual(splits, result.row_splits)
    self.assertEqual([5.0, 10.0, 20.0], result.values.numpy().tolist()) 
Example #13
Source File: text_inputter.py    From OpenNMT-tf with MIT License 5 votes vote down vote up
def add_sequence_controls(ids, length, start_id=None, end_id=None):
  """Adds sequence control tokens.

  Args:
    ids: Sequence of ids as 1D or 2D (batch) tensor.
    length: Sequence length as 0D or 1D (batch) tensor.
    start_id: Id to prepend to the sequence (set ``None`` to disable).
    end_id: Id to append to the sequence (set ``None`` to disable).

  Returns:
    A tuple ``(ids, length)``.
  """
  rank = ids.shape.rank
  if rank not in (1, 2):
    raise ValueError("Unsupported rank %d (expected 1 or 2)" % rank)
  batch_size = tf.shape(ids)[0] if rank == 2 else None

  def _make_column(value):
    value = tf.constant(value, dtype=ids.dtype)
    if batch_size is not None:
      value = tf.fill([batch_size], value)
    return tf.expand_dims(value, -1)

  if start_id is not None:
    start_ids = _make_column(constants.START_OF_SENTENCE_ID)
    ids = tf.concat([start_ids, ids], axis=-1)
    length += 1

  if end_id is not None:
    end_ids = _make_column(constants.END_OF_SENTENCE_ID)
    if batch_size is not None:
      # Run concat on RaggedTensor to handle sequences with variable length.
      ids = tf.RaggedTensor.from_tensor(ids, lengths=length)
    ids = tf.concat([ids, end_ids], axis=-1)
    if batch_size is not None:
      ids = ids.to_tensor()
    length += 1

  return ids, length 
Example #14
Source File: mappers.py    From transform with Apache License 2.0 5 votes vote down vote up
def word_count(tokens, name=None):
  """Find the token count of each document/row.

  `tokens` is either a `RaggedTensor` or `SparseTensor`, representing tokenized
  strings. This function simply returns size of each row, so the dtype is not
  constrained to string.

  Args:
    tokens: either
      (1) a two-dimensional `SparseTensor`, or
      (2) a `RaggedTensor` with ragged rank of 1, non-ragged rank of 1
      of dtype `tf.string` containing tokens to be counted
    name: (Optional) A name for this operation.

  Returns:
    A one-dimensional `Tensor` the token counts of each row.

  Raises:
    ValueError: if tokens is neither sparse nor ragged
  """
  with tf.compat.v1.name_scope(name, 'word_count'):
    if isinstance(tokens, tf.RaggedTensor):
      return tokens.row_lengths()
    elif isinstance(tokens, tf.SparseTensor):
      result = tf.sparse.reduce_sum(
          tf.SparseTensor(indices=tokens.indices,
                          values=tf.ones_like(tokens.values, dtype=tf.int64),
                          dense_shape=tokens.dense_shape),
          axis=1)
      result.set_shape([tokens.shape[0]])
      return result
    else:
      raise ValueError('Invalid token tensor') 
Example #15
Source File: tokenizer_test.py    From OpenNMT-tf with MIT License 5 votes vote down vote up
def _testTokenizerOnBatchTensor(self, tokenizer, text, ref_tokens):
    text = tf.constant(text)
    tokens = tokenizer.tokenize(text)
    self.assertIsInstance(tokens, tf.RaggedTensor)
    self.assertAllEqual(tokens.to_list(), tf.nest.map_structure(tf.compat.as_bytes, ref_tokens)) 
Example #16
Source File: text.py    From OpenNMT-tf with MIT License 5 votes vote down vote up
def tokens_to_words(tokens, subword_token="■", is_spacer=None):
  """Converts a sequence of tokens to a sequence of words.

  Example:

    >>> opennmt.data.tokens_to_words(["He@@", "llo", "W@@", "orld", "@@!"], subword_token="@@")
    <tf.RaggedTensor [[b'He@@', b'llo'], [b'W@@', b'orld', b'@@!']]>

  Args:
    tokens: A 1D string ``tf.Tensor``.
    subword_token: The special token used by the subword tokenizer.
    is_spacer: Whether :obj:`subword_token` is used as a spacer (as in
      SentencePiece) or a joiner (as in BPE). If ``None``, will infer
      directly from :obj:`subword_token`.

  Returns:
    The words as a 2D string ``tf.RaggedTensor``.
  """
  if is_spacer is None:
    is_spacer = subword_token == "▁"
  if is_spacer:
    # First token implicitly starts with a spacer.
    left_and_single = tf.logical_or(
        tf.strings.regex_full_match(tokens, "%s.*" % subword_token),
        tf.one_hot(0, tf.shape(tokens)[0], on_value=True, off_value=False))
    right = tf.strings.regex_full_match(tokens, ".+%s" % subword_token)
    word_start = tf.logical_or(tf.roll(right, shift=1, axis=0), left_and_single)
  else:
    right = tf.strings.regex_full_match(tokens, ".*%s" % subword_token)
    left = tf.strings.regex_full_match(tokens, "%s.*" % subword_token)
    subword = tf.logical_or(tf.roll(right, shift=1, axis=0), left)
    word_start = tf.logical_not(subword)
  start_indices = tf.squeeze(tf.where(word_start), -1)
  return tf.RaggedTensor.from_row_starts(tokens, start_indices) 
Example #17
Source File: text.py    From OpenNMT-tf with MIT License 5 votes vote down vote up
def tokens_to_chars(tokens):
  """Splits tokens into unicode characters.

  Example:

    >>> opennmt.data.tokens_to_chars(["hello", "world"])
    <tf.RaggedTensor [[b'h', b'e', b'l', b'l', b'o'], [b'w', b'o', b'r', b'l', b'd']]>

  Args:
    tokens: A string ``tf.Tensor`` of shape :math:`[T]`.

  Returns:
    The characters as a 2D string ``tf.RaggedTensor``.
  """
  return tf.strings.unicode_split(tokens, "UTF-8") 
Example #18
Source File: tokenizer.py    From OpenNMT-tf with MIT License 5 votes vote down vote up
def _detokenize_batch_tensor(self, tokens, sequence_length):
    ragged = tf.RaggedTensor.from_tensor(tokens, lengths=sequence_length)
    return self._detokenize_ragged_tensor(ragged) 
Example #19
Source File: tokenizer.py    From OpenNMT-tf with MIT License 5 votes vote down vote up
def _detokenize_ragged_tensor(self, tokens):
    """Detokenizes a batch of tokens as a ``tf.RaggedTensor``

    When not overriden, this default implementation calls _detokenize_batch_tensor
    on the dense representation.

    Args:
      tokens: A 2-D ``tf.RaggedTensor``.

    Returns:
      A 1-D string ``tf.Tensor``.
    """
    return self._detokenize_batch_tensor(tokens.to_tensor(), tokens.row_lengths()) 
Example #20
Source File: text_inputter.py    From OpenNMT-tf with MIT License 5 votes vote down vote up
def make_features(self, element=None, features=None, training=None):
    """Tokenizes raw text."""
    if features is None:
      features = {}
    if "tokens" in features:
      return features
    if "text" in features:
      element = features.pop("text")
    tokens = self.tokenizer.tokenize(element)
    if isinstance(tokens, tf.RaggedTensor):
      length = tokens.row_lengths()
      tokens = tokens.to_tensor()
    else:
      length = tf.shape(tokens)[0]
    if training and self.noiser is not None:
      noisy_tokens, noisy_length = self.noiser(tokens, keep_shape=False)
      if self.in_place_noise:
        tokens, length = tf.cond(
            tf.random.uniform([]) < self.noise_probability,
            true_fn=lambda: (noisy_tokens, noisy_length),
            false_fn=lambda: (tokens, length))
      else:
        # Call make_features again to fill the remaining noisy features.
        noisy_features = dict(tokens=noisy_tokens, length=noisy_length)
        noisy_features = self.make_features(features=noisy_features, training=training)
        for key, value in noisy_features.items():
          features["noisy_%s" % key] = value
    features["length"] = length
    features["tokens"] = tokens
    return features 
Example #21
Source File: schema_inference.py    From transform with Apache License 2.0 4 votes vote down vote up
def _feature_spec_from_batched_tensors(tensors):
  """Infer a feature spec from a dict of tensors.

  Args:
    tensors: A dict whose keys are strings and values are `Tensor` or
      `SparseTensor`s.

  Returns:
    A feature spec inferred from the types and shapes of the tensors.

  Raises:
    ValueError: If the feature spec cannot be inferred.
    TypeError: If any of the values of `tensors` are not a `Tensor` or
        `SparseTensor`.
  """
  feature_spec = {}
  for name, tensor in six.iteritems(tensors):
    tensor = tensors[name]
    if tensor.dtype not in (tf.string, tf.int64, tf.float32):
      raise ValueError('Feature {} ({}) had invalid dtype {} for feature spec'
                       .format(name, tensor, tensor.dtype))
    if isinstance(tensor, tf.SparseTensor):
      shape = tensor.get_shape()
      if shape.ndims != 2:
        raise ValueError(
            'Feature {} ({}) had invalid shape {} for VarLenFeature: must have '
            'rank 2'.format(name, tensor, shape))
      feature_spec[name] = tf.io.VarLenFeature(tensor.dtype)
    elif isinstance(tensor, tf.Tensor):
      shape = tensor.get_shape()
      if shape.ndims in [None, 0]:
        raise ValueError(
            'Feature {} ({}) had invalid shape {} for FixedLenFeature: must '
            'have rank at least 1'.format(name, tensor, shape))
      if any(dim is None for dim in shape.as_list()[1:]):
        raise ValueError(
            'Feature {} ({}) had invalid shape {} for FixedLenFeature: apart '
            'from the batch dimension, all dimensions must have known size'
            .format(name, tensor, shape))
      feature_spec[name] = tf.io.FixedLenFeature(shape.as_list()[1:],
                                                 tensor.dtype)
    elif isinstance(tensor, tf.RaggedTensor):
      tf.compat.v1.logging.warn(
          'Feature %s was a RaggedTensor.  A Schema will be generated but the '
          'Schema cannot be used with a coder (e.g. to materialize output '
          'data) or to generated a feature spec.', name)
      # Arbitrarily select VarLenFeature.
      feature_spec[name] = tf.io.VarLenFeature(tensor.dtype)
    else:
      raise TypeError(
          'Expected a Tensor or SparseTensor, got {} of type {} for feature {}'
          .format(tensor, type(tensor), name))

  return feature_spec 
Example #22
Source File: neighbor_features.py    From neural-structured-learning with Apache License 2.0 4 votes vote down vote up
def make_missing_neighbor_inputs(neighbor_config,
                                 inputs,
                                 weight_dtype=tf.float32):
  """Makes additional inputs for neighbor features if necessary.

  Args:
    neighbor_config: An instance of `configs.GraphNeighborConfig` specifying the
      number of neighbors and how neighbor features should be named.
    inputs: Dictionary of input tensors that may be missing neighbor features.
      The keys are the features names. See `utils.unpack_neighbor_features` for
      expected names of neighbor features and weights.
    weight_dtype: `tf.Dtype` for neighbors weights. Defaults to `tf.float32`.

  Returns:
    A dictionary of neighbor feature and weight tensors that do not already
    exist in `inputs`. The keys are specified according to `neighbor_config`.
  """
  existing_feature_names = set(inputs.keys())
  neighbor_inputs = {}
  for i in range(neighbor_config.max_neighbors):  # For each potential neighbor.
    # Weight of the neighbor.
    weight_name = '{}{}{}'.format(neighbor_config.prefix, i,
                                  neighbor_config.weight_suffix)
    if weight_name not in existing_feature_names:
      neighbor_inputs[weight_name] = tf.keras.Input((1,),
                                                    dtype=weight_dtype,
                                                    name=weight_name)
    # For inputs without existing neighbor features, replicate them.
    for feature_name, tensor in inputs.items():
      if feature_name.startswith(neighbor_config.prefix):
        continue
      neighbor_feature_name = '{}{}_{}'.format(neighbor_config.prefix, i,
                                               feature_name)
      if neighbor_feature_name not in existing_feature_names:
        neighbor_inputs[neighbor_feature_name] = tf.keras.Input(
            tensor.shape[1:],
            batch_size=tensor.shape[0],
            dtype=tensor.dtype,
            name=neighbor_feature_name,
            ragged=isinstance(tensor, tf.RaggedTensor),
            sparse=isinstance(tensor, tf.sparse.SparseTensor))
  return neighbor_inputs 
Example #23
Source File: tensor_adapter_test.py    From tfx-bsl with Apache License 2.0 4 votes vote down vote up
def _MakeRaggedTensorDTypesTestCases():
  result = []
  tensor_representation_textpb = """
  ragged_tensor {
    feature_path {
      step: "ragged_feature"
    }
  }
  """
  for t in _ALL_SUPPORTED_VALUE_TYPES:
    for list_type_factory in (("list", pa.list_), ("large_list",
                                                   pa.large_list)):
      expected_type_spec = tf.RaggedTensorSpec([None, None],
                                               _ARROW_TYPE_TO_TF_TYPE[t],
                                               ragged_rank=1,
                                               row_splits_dtype=tf.int64)
      if pa.types.is_integer(t):
        values = [[1, 2], None, [], [3]]
        expected_values = [1, 2, 3]
      elif pa.types.is_floating(t):
        values = [[1.0, 2.0], None, [], [3.0]]
        expected_values = [1.0, 2.0, 3.0]
      else:
        values = [[b"a", b"b"], None, [], [b"c"]]
        expected_values = [b"a", b"b", b"c"]
      row_splits = np.asarray([0, 2, 2, 2, 3], dtype=np.int64)

      if tf.executing_eagerly():
        expected_output = tf.RaggedTensor.from_row_splits(
            values=tf.constant(
                expected_values, dtype=_ARROW_TYPE_TO_TF_TYPE[t]),
            row_splits=row_splits)
      else:
        expected_output = tf.compat.v1.ragged.RaggedTensorValue(
            values=np.array(expected_values, _ARROW_TYPE_TO_NP_TYPE[t]),
            row_splits=row_splits)

      result.append({
          "testcase_name":
              "1D_{}_{}".format(t, list_type_factory[0]),
          "tensor_representation_textpb":
              tensor_representation_textpb,
          "record_batch":
              pa.RecordBatch.from_arrays(
                  [pa.array(values, type=list_type_factory[1](t))],
                  ["ragged_feature"]),
          "expected_ragged_tensor":
              expected_output,
          "expected_type_spec":
              expected_type_spec,
      })

  return result