Python Examples of tensorflow.RaggedTensor

Source File: export.py From estimator with Apache License 2.0

6 votes

def _check_tensor(tensor, name, error_label='feature'):
  """Check that passed `tensor` is a Tensor or SparseTensor or RaggedTensor."""
  if not (isinstance(tensor, tf.Tensor) or
          isinstance(tensor, tf.sparse.SparseTensor) or
          isinstance(tensor, tf.RaggedTensor)):
    fmt_name = ' {}'.format(name) if name else ''
    value_error = ValueError('{}{} must be a Tensor, SparseTensor, or '
                             'RaggedTensor.'.format(error_label, fmt_name))
    # NOTE(ericmc): This if-else block is a specific carve-out for
    # LabeledTensor, which has a `.tensor` attribute and which is
    # convertible to tf.Tensor via ops.convert_to_tensor.
    # Allowing all types convertible to tf.Tensor is considered by soergel@
    # to be too permissive.
    # TODO(soergel): accept any type convertible to Tensor,
    # as in cl/193238295 snapshot #6.
    if hasattr(tensor, 'tensor'):
      try:
        ops.convert_to_tensor(tensor)
      except TypeError:
        raise value_error
    else:
      raise value_error

Source File: tokenizer.py From OpenNMT-tf with MIT License

6 votes

def tokenize(self, text):
    """Tokenizes text.

    Args:
      text: A string or batch of strings to tokenize as a ``tf.Tensor`` or
        Python values.

    Returns:
      - If :obj:`text` is a Python string, a list of Python strings.
      - If :obj:`text` is a list of Python strings, a list of list of Python
        strings.
      - If :obj:`text` is a 0-D ``tf.Tensor``, a 1-D ``tf.Tensor``.
      - If :obj:`text` is a 1-D ``tf.Tensor``, a 2-D ``tf.RaggedTensor``.

    Raises:
      ValueError: if the rank of :obj:`text` is greater than 1.
    """
    with tf.device("cpu:0"):
      return self._tokenize(text)

Source File: tokenizer.py From OpenNMT-tf with MIT License

6 votes

def detokenize(self, tokens, sequence_length=None):
    """Detokenizes tokens.

    The Tensor version supports batches of tokens.

    Args:
      tokens: Tokens or batch of tokens as a ``tf.Tensor``, ``tf.RaggedTensor``,
        or Python values.
      sequence_length: The length of each sequence. Required if :obj:`tokens`
        is a dense 2-D ``tf.Tensor``.

    Returns:
      - If :obj:`tokens` is a list of list of Python strings, a list of Python strings.
      - If :obj:`tokens` is a list of Python strings, a Python string.
      - If :obj:`tokens` is a N-D ``tf.Tensor`` (or ``tf.RaggedTensor``), a
        (N-1)-D ``tf.Tensor``.

    Raises:
      ValueError: if the rank of :obj:`tokens` is greater than 2.
      ValueError: if :obj:`tokens` is a 2-D dense ``tf.Tensor`` and
        :obj:`sequence_length` is not set.
    """
    with tf.device("cpu:0"):
      return self._detokenize(tokens, sequence_length)

Source File: tokenizer.py From OpenNMT-tf with MIT License

6 votes

def _detokenize(self, tokens, sequence_length):
    if isinstance(tokens, tf.RaggedTensor):
      rank = len(tokens.shape)
      if rank == 1:
        return self._detokenize_tensor(tokens.values)
      elif rank == 2:
        return self._detokenize_ragged_tensor(tokens)
      else:
        raise ValueError("Unsupported RaggedTensor rank %d for detokenization" % rank)
    elif tf.is_tensor(tokens):
      rank = len(tokens.shape)
      if rank == 1:
        return self._detokenize_tensor(tokens)
      elif rank == 2:
        if sequence_length is None:
          raise ValueError("sequence_length is required for Tensor detokenization")
        return self._detokenize_batch_tensor(tokens, sequence_length)
      else:
        raise ValueError("Unsupported tensor rank %d for detokenization" % rank)
    elif isinstance(tokens, list) and tokens and isinstance(tokens[0], list):
      return list(map(self.detokenize, tokens))
    else:
      tokens = [tf.compat.as_text(token) for token in tokens]
      return self._detokenize_string(tokens)

Source File: tensor_adapter_test.py From tfx-bsl with Apache License 2.0

6 votes

def testRaggedTensor(self, tensor_representation_textpb, record_batch,
                       expected_type_spec, expected_ragged_tensor):
    tensor_representation = text_format.Parse(tensor_representation_textpb,
                                              schema_pb2.TensorRepresentation())
    adapter = tensor_adapter.TensorAdapter(
        tensor_adapter.TensorAdapterConfig(record_batch.schema,
                                           {"output": tensor_representation}))
    converted = adapter.ToBatchTensors(record_batch)
    self.assertLen(converted, 1)
    self.assertIn("output", converted)
    actual_output = converted["output"]
    self.assertIsInstance(
        actual_output, (tf.RaggedTensor, tf.compat.v1.ragged.RaggedTensorValue))
    if tf.executing_eagerly():
      self.assertTrue(
          expected_type_spec.is_compatible_with(actual_output),
          "{} is not compatible with spec {}".format(actual_output,
                                                     expected_type_spec))

    self.assertRaggedAllEqual(actual_output, expected_ragged_tensor)
    self.assertAdapterCanProduceNonEagerInEagerMode(adapter, record_batch)

Source File: saved_transform_io_test.py From transform with Apache License 2.0

5 votes

def test_ragged_roundtrip(self):
    if not hasattr(meta_graph_pb2.TensorInfo, 'CompositeTensor'):
      self.skipTest('This version of TensorFlow does not support '
                    'CompositeTenors in TensorInfo.')
    export_path = os.path.join(tempfile.mkdtemp(), 'export')

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        input_float = tf.compat.v1.ragged.placeholder(tf.float32, ragged_rank=1,
                                                      value_shape=[])
        output = input_float / 2.0
        inputs = {'input': input_float}
        outputs = {'output': output}
        saved_transform_io.write_saved_transform_from_session(
            session, inputs, outputs, export_path)

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        splits = np.array([0, 2, 3], dtype=np.int64)
        values = np.array([1.0, 2.0, 4.0], dtype=np.float32)
        input_ragged = tf.RaggedTensor.from_row_splits(values, splits)

        # Using a computed input gives confidence that the graphs are fused
        inputs = {'input': input_ragged * 10}
        _, outputs = (
            saved_transform_io.partially_apply_saved_transform_internal(
                export_path, inputs))
        output_ragged = outputs['output']
        self.assertIsInstance(output_ragged, tf.RaggedTensor)
        result = session.run(output_ragged)

        # indices and shape unchanged; values multipled by 10 and divided by 2
        self.assertAllEqual(splits, result.row_splits)
        self.assertEqual([5.0, 10.0, 20.0], result.values.tolist())

Source File: util.py From model-analysis with Apache License 2.0

5 votes

def wrap_tensor_or_dict_of_tensors_in_identity(
    tensor_or_dict_of_tensors: types.TensorTypeMaybeDict
) -> types.TensorTypeMaybeDict:
  # pyformat: disable
  """Wrap the given Tensor / dict of Tensors in tf.identity.

  Args:
    tensor_or_dict_of_tensors: Tensor or dict of Tensors to wrap around.

  Workaround for TensorFlow issue #17568 (b/71769512).

  Returns:
    Tensor or dict of Tensors wrapped with tf.identity.

  Raises:
    ValueError: We could not wrap the given Tensor / dict of Tensors in
      tf.identity.
  """
  # pyformat: enable

  def _wrap_tensor_in_identity(tensor: types.TensorType) -> types.TensorType:
    if isinstance(tensor, (tf.Tensor, tf.RaggedTensor)):
      return tf.identity(tensor)
    elif isinstance(tensor, tf.SparseTensor):
      return tf.SparseTensor(
          indices=tf.identity(tensor.indices),
          values=tf.identity(tensor.values),
          dense_shape=tf.identity(tensor.dense_shape))
    else:
      raise ValueError('could not wrap Tensor %s in identity' % str(tensor))

  if isinstance(tensor_or_dict_of_tensors, dict):
    result = {}
    for k, v in tensor_or_dict_of_tensors.items():
      # Dictionary elements should only be Tensors (and not dictionaries).
      result[k] = _wrap_tensor_in_identity(v)
    return result
  else:
    return _wrap_tensor_in_identity(tensor_or_dict_of_tensors)

Source File: graph_tools.py From transform with Apache License 2.0

5 votes

def get_dependent_inputs(graph, input_tensors, output_tensors):
  """Returns tensors in input_tensors that (transitively) produce output_tensors.

  Args:
    graph: A `tf.Graph`. It could be the (intermediate) output tf graph in any
      transform phase (including phase 0 where no tensor replacement has yet
      happened).
    input_tensors: A dict of logical name to `tf.Tensor`, `tf.SparseTensor`, or
      `tf.RaggedTensor`. Logical name doesn't have any implications in this
      method and can be anything. In some cases it is the feature name
      corresponding to the input tensor.
    output_tensors: A dict of logical name to `tf.Tensor`, `tf.SparseTensor`, or
      `tf.RaggedTensor`, or a list of `tf.Tensor`, `tf.SparseTensor`, or
      `tf.RaggedTensor`.

  Returns:
    A dict of logical name to `tf.Tensor`, `tf.SparseTensor`, or
    `tf.RaggedTensor` that are filtered from input_tensors (transitively)
    producing output_tensors
  """
  if isinstance(output_tensors, list):
    output_iterator = output_tensors
  else:
    output_iterator = six.itervalues(output_tensors)

  # Since this method may be called before all tensor replacements are ready, to
  # fulfill the precondition of InitializableGraphAnalyzer, we fake the
  # readiness of tensor replacements. Note that the readiness of replacement
  # tensors doesn't affect the correctness of dependencies tracing.
  tensor_sinks = graph.get_collection(analyzer_nodes.TENSOR_REPLACEMENTS)
  sink_tensors_ready = [(sink.tensor, False) for sink in tensor_sinks]
  graph_analyzer = InitializableGraphAnalyzer(graph, input_tensors,
                                              sink_tensors_ready)
  dependent_inputs = {}
  for output_tensor in output_iterator:
    dependent_inputs.update(graph_analyzer.get_dependent_inputs(output_tensor))
  return {
      name: tensor
      for name, tensor in six.iteritems(input_tensors)
      if name in dependent_inputs
  }

Source File: graph_tools.py From transform with Apache License 2.0

5 votes

def get_dependent_inputs(self, tensor_or_op):
    """Gets the inputs that the given `tensor_or_op` transitively depends on.

    Args:
      tensor_or_op: A `Tensor`, `SparseTensor`, `RaggedTensor` or `Operation`.

    Returns:
      A dict of name to `Tensor`, `SparseTensor`, or `RaggedTensor` (sub-dict of
      `input_signature`) that the given `tensor_or_op` depends on.

    Raises:
      TypeError: If `tensor_or_op` is of an unsupported type.
    """
    if not isinstance(
        tensor_or_op,
        (tf.Tensor, tf.SparseTensor, tf.RaggedTensor, tf.Operation)):
      raise TypeError(
          'Expected Tensor, SparseTensor, RaggedTensor or Operation got {} of '
          'type {}'.format(tensor_or_op, type(tensor_or_op)))

    dependents = set()
    for component in _decompose_tensor_or_op(tensor_or_op):
      dependents.update(
          self._graph_analyzer.analyze_tensor(component).dependent_sources)

    result = {}
    for name, tensor in six.iteritems(self._input_signature):
      if any(
          tf_utils.hashable_tensor_or_op(component) in dependents
          for component in _decompose_tensor_or_op(tensor)):
        result[name] = tensor
    return result

Source File: graph_tools.py From transform with Apache License 2.0

5 votes

def _make_source_infos_dict(self, input_signature, replaced_tensors_ready):
    """Builds a dictionary from source tensors to _SourceInfos.

    This dictionary stores information about the sources of the graph.
    Each tensor in replaced_tensors_ready is a source whose readiness is known
    and has no name.  Each tensor (or component of a tensor) in input_signature
    is ready to run and has a name determined by the signature.

    Args:
      input_signature: A dict whose keys are strings and values are `Tensor`s,
        `SparseTensor`s, or `RaggedTensor`s.
      replaced_tensors_ready: a dict from `Tensor`, `SparseTensor`s, or
      `RaggedTensor`s to bool indicating whether the tensor is ready in this
      phase.

    Returns:
      a dictionary from source tensors to _SourceInfos.
    """
    result = {}
    for tensor_or_op, is_ready in six.iteritems(replaced_tensors_ready):
      for component in _decompose_tensor_or_op(
          tf_utils.deref_tensor_or_op(tensor_or_op)):
        result[tf_utils.hashable_tensor_or_op(component)] = _SourceInfo(
            is_ready, None)

    for name, tensor in six.iteritems(input_signature):
      if isinstance(tensor, tf.Tensor):
        _set_unique_value_in_dict(result, tensor,
                                  _SourceInfo(True, '{}$tensor'.format(name)))
      elif isinstance(tensor, composite_tensor.CompositeTensor):
        for idx, tensor_component in enumerate(_decompose_tensor_or_op(tensor)):
          _set_unique_value_in_dict(
              result, tensor_component,
              _SourceInfo(True, '{}$composite_tensor_{}'.format(name, idx)))
      else:
        raise TypeError(
            'Expected Tensor, or CompositeTensor, got {} of type {}'.format(
                tensor, type(tensor)))
    return result

Source File: graph_tools.py From transform with Apache License 2.0

5 votes

def ready_to_run(self, tensor_or_op):
    """Determine if a given tensor or op is ready to run.

    A tensor is ready to run if every tensor in all its transitive dependencies
    are set to `True` in `known_ready`.

    Note that if a placeholder is encountered, this will result in an error as
    it is assumed that all placeholders are keys in `known_ready`.  This is
    to avoid unexpected behavior when the user creates placeholders (as opposed
    to placeholders created by the tf.Transform framework).

    Similarly encountering a Table op is an error because a table should be
    a key in `known_ready` (in the case of analyzing the main session run) or
    should not be encountered (in the case of analyzing the graph init run).

    Args:
      tensor_or_op: A `Tensor`, `SparseTensor`, `RaggedTensor` or `Operation`

    Returns:
      A bool indicating whether then tensor is ready to run.

    Raises:
      ValueError: If a placeholder or table is encountered.
      _UnexpectedTableError: If an initializable table op is encountered.
      _UnexpectedPlaceholderError: If a placeholder is encountered.
    """
    if not isinstance(
        tensor_or_op,
        (tf.Tensor, tf.SparseTensor, tf.RaggedTensor, tf.Operation)):
      raise TypeError(
          'Expected Tensor, SparseTensor, RaggedTensor, or Operation got {} of type {}'
          .format(tensor_or_op, type(tensor_or_op)))
    return all(
        self.analyze_tensor(component).is_ready_to_run
        for component in _decompose_tensor_or_op(tensor_or_op))

Source File: saved_transform_io_v2_test.py From transform with Apache License 2.0

5 votes

def test_ragged_roundtrip(self):
    if not hasattr(meta_graph_pb2.TensorInfo, 'CompositeTensor'):
      self.skipTest('This version of TensorFlow does not support '
                    'CompositeTenors in TensorInfo.')
    export_path = os.path.join(tempfile.mkdtemp(), 'export')

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        input_float = tf.compat.v1.ragged.placeholder(tf.float32, ragged_rank=1,
                                                      value_shape=[])
        output = input_float / 2.0
        inputs = {'input': input_float}
        outputs = {'output': output}
        saved_transform_io.write_saved_transform_from_session(
            session, inputs, outputs, export_path)

    splits = np.array([0, 2, 3], dtype=np.int64)
    values = np.array([1.0, 2.0, 4.0], dtype=np.float32)
    input_ragged = tf.RaggedTensor.from_row_splits(values, splits)

    # Using a computed input gives confidence that the graphs are fused
    inputs = {'input': input_ragged * 10}
    saved_model_loader = saved_transform_io_v2.SavedModelLoader(export_path)
    outputs = saved_model_loader.apply_v1_transform_model_in_v2(inputs)
    result = outputs['output']
    self.assertIsInstance(result, tf.RaggedTensor)

    # indices and shape unchanged; values multipled by 10 and divided by 2
    self.assertAllEqual(splits, result.row_splits)
    self.assertEqual([5.0, 10.0, 20.0], result.values.numpy().tolist())

Source File: text_inputter.py From OpenNMT-tf with MIT License

5 votes

def add_sequence_controls(ids, length, start_id=None, end_id=None):
  """Adds sequence control tokens.

  Args:
    ids: Sequence of ids as 1D or 2D (batch) tensor.
    length: Sequence length as 0D or 1D (batch) tensor.
    start_id: Id to prepend to the sequence (set ``None`` to disable).
    end_id: Id to append to the sequence (set ``None`` to disable).

  Returns:
    A tuple ``(ids, length)``.
  """
  rank = ids.shape.rank
  if rank not in (1, 2):
    raise ValueError("Unsupported rank %d (expected 1 or 2)" % rank)
  batch_size = tf.shape(ids)[0] if rank == 2 else None

  def _make_column(value):
    value = tf.constant(value, dtype=ids.dtype)
    if batch_size is not None:
      value = tf.fill([batch_size], value)
    return tf.expand_dims(value, -1)

  if start_id is not None:
    start_ids = _make_column(constants.START_OF_SENTENCE_ID)
    ids = tf.concat([start_ids, ids], axis=-1)
    length += 1

  if end_id is not None:
    end_ids = _make_column(constants.END_OF_SENTENCE_ID)
    if batch_size is not None:
      # Run concat on RaggedTensor to handle sequences with variable length.
      ids = tf.RaggedTensor.from_tensor(ids, lengths=length)
    ids = tf.concat([ids, end_ids], axis=-1)
    if batch_size is not None:
      ids = ids.to_tensor()
    length += 1

  return ids, length

Source File: mappers.py From transform with Apache License 2.0

5 votes

def word_count(tokens, name=None):
  """Find the token count of each document/row.

  `tokens` is either a `RaggedTensor` or `SparseTensor`, representing tokenized
  strings. This function simply returns size of each row, so the dtype is not
  constrained to string.

  Args:
    tokens: either
      (1) a two-dimensional `SparseTensor`, or
      (2) a `RaggedTensor` with ragged rank of 1, non-ragged rank of 1
      of dtype `tf.string` containing tokens to be counted
    name: (Optional) A name for this operation.

  Returns:
    A one-dimensional `Tensor` the token counts of each row.

  Raises:
    ValueError: if tokens is neither sparse nor ragged
  """
  with tf.compat.v1.name_scope(name, 'word_count'):
    if isinstance(tokens, tf.RaggedTensor):
      return tokens.row_lengths()
    elif isinstance(tokens, tf.SparseTensor):
      result = tf.sparse.reduce_sum(
          tf.SparseTensor(indices=tokens.indices,
                          values=tf.ones_like(tokens.values, dtype=tf.int64),
                          dense_shape=tokens.dense_shape),
          axis=1)
      result.set_shape([tokens.shape[0]])
      return result
    else:
      raise ValueError('Invalid token tensor')

Source File: tokenizer_test.py From OpenNMT-tf with MIT License

5 votes

def _testTokenizerOnBatchTensor(self, tokenizer, text, ref_tokens):
    text = tf.constant(text)
    tokens = tokenizer.tokenize(text)
    self.assertIsInstance(tokens, tf.RaggedTensor)
    self.assertAllEqual(tokens.to_list(), tf.nest.map_structure(tf.compat.as_bytes, ref_tokens))

Source File: text.py From OpenNMT-tf with MIT License

5 votes

def tokens_to_words(tokens, subword_token="￭", is_spacer=None):
  """Converts a sequence of tokens to a sequence of words.

  Example:

    >>> opennmt.data.tokens_to_words(["He@@", "llo", "W@@", "orld", "@@!"], subword_token="@@")
    <tf.RaggedTensor [[b'He@@', b'llo'], [b'W@@', b'orld', b'@@!']]>

  Args:
    tokens: A 1D string ``tf.Tensor``.
    subword_token: The special token used by the subword tokenizer.
    is_spacer: Whether :obj:`subword_token` is used as a spacer (as in
      SentencePiece) or a joiner (as in BPE). If ``None``, will infer
      directly from :obj:`subword_token`.

  Returns:
    The words as a 2D string ``tf.RaggedTensor``.
  """
  if is_spacer is None:
    is_spacer = subword_token == "▁"
  if is_spacer:
    # First token implicitly starts with a spacer.
    left_and_single = tf.logical_or(
        tf.strings.regex_full_match(tokens, "%s.*" % subword_token),
        tf.one_hot(0, tf.shape(tokens)[0], on_value=True, off_value=False))
    right = tf.strings.regex_full_match(tokens, ".+%s" % subword_token)
    word_start = tf.logical_or(tf.roll(right, shift=1, axis=0), left_and_single)
  else:
    right = tf.strings.regex_full_match(tokens, ".*%s" % subword_token)
    left = tf.strings.regex_full_match(tokens, "%s.*" % subword_token)
    subword = tf.logical_or(tf.roll(right, shift=1, axis=0), left)
    word_start = tf.logical_not(subword)
  start_indices = tf.squeeze(tf.where(word_start), -1)
  return tf.RaggedTensor.from_row_starts(tokens, start_indices)

Source File: text.py From OpenNMT-tf with MIT License

5 votes

def tokens_to_chars(tokens):
  """Splits tokens into unicode characters.

  Example:

    >>> opennmt.data.tokens_to_chars(["hello", "world"])
    <tf.RaggedTensor [[b'h', b'e', b'l', b'l', b'o'], [b'w', b'o', b'r', b'l', b'd']]>

  Args:
    tokens: A string ``tf.Tensor`` of shape :math:`[T]`.

  Returns:
    The characters as a 2D string ``tf.RaggedTensor``.
  """
  return tf.strings.unicode_split(tokens, "UTF-8")

Source File: tokenizer.py From OpenNMT-tf with MIT License

5 votes

def _detokenize_batch_tensor(self, tokens, sequence_length):
    ragged = tf.RaggedTensor.from_tensor(tokens, lengths=sequence_length)
    return self._detokenize_ragged_tensor(ragged)

Source File: tokenizer.py From OpenNMT-tf with MIT License

5 votes

def _detokenize_ragged_tensor(self, tokens):
    """Detokenizes a batch of tokens as a ``tf.RaggedTensor``

    When not overriden, this default implementation calls _detokenize_batch_tensor
    on the dense representation.

    Args:
      tokens: A 2-D ``tf.RaggedTensor``.

    Returns:
      A 1-D string ``tf.Tensor``.
    """
    return self._detokenize_batch_tensor(tokens.to_tensor(), tokens.row_lengths())

Source File: text_inputter.py From OpenNMT-tf with MIT License

5 votes

def make_features(self, element=None, features=None, training=None):
    """Tokenizes raw text."""
    if features is None:
      features = {}
    if "tokens" in features:
      return features
    if "text" in features:
      element = features.pop("text")
    tokens = self.tokenizer.tokenize(element)
    if isinstance(tokens, tf.RaggedTensor):
      length = tokens.row_lengths()
      tokens = tokens.to_tensor()
    else:
      length = tf.shape(tokens)[0]
    if training and self.noiser is not None:
      noisy_tokens, noisy_length = self.noiser(tokens, keep_shape=False)
      if self.in_place_noise:
        tokens, length = tf.cond(
            tf.random.uniform([]) < self.noise_probability,
            true_fn=lambda: (noisy_tokens, noisy_length),
            false_fn=lambda: (tokens, length))
      else:
        # Call make_features again to fill the remaining noisy features.
        noisy_features = dict(tokens=noisy_tokens, length=noisy_length)
        noisy_features = self.make_features(features=noisy_features, training=training)
        for key, value in noisy_features.items():
          features["noisy_%s" % key] = value
    features["length"] = length
    features["tokens"] = tokens
    return features

Source File: schema_inference.py From transform with Apache License 2.0

4 votes

def _feature_spec_from_batched_tensors(tensors):
  """Infer a feature spec from a dict of tensors.

  Args:
    tensors: A dict whose keys are strings and values are `Tensor` or
      `SparseTensor`s.

  Returns:
    A feature spec inferred from the types and shapes of the tensors.

  Raises:
    ValueError: If the feature spec cannot be inferred.
    TypeError: If any of the values of `tensors` are not a `Tensor` or
        `SparseTensor`.
  """
  feature_spec = {}
  for name, tensor in six.iteritems(tensors):
    tensor = tensors[name]
    if tensor.dtype not in (tf.string, tf.int64, tf.float32):
      raise ValueError('Feature {} ({}) had invalid dtype {} for feature spec'
                       .format(name, tensor, tensor.dtype))
    if isinstance(tensor, tf.SparseTensor):
      shape = tensor.get_shape()
      if shape.ndims != 2:
        raise ValueError(
            'Feature {} ({}) had invalid shape {} for VarLenFeature: must have '
            'rank 2'.format(name, tensor, shape))
      feature_spec[name] = tf.io.VarLenFeature(tensor.dtype)
    elif isinstance(tensor, tf.Tensor):
      shape = tensor.get_shape()
      if shape.ndims in [None, 0]:
        raise ValueError(
            'Feature {} ({}) had invalid shape {} for FixedLenFeature: must '
            'have rank at least 1'.format(name, tensor, shape))
      if any(dim is None for dim in shape.as_list()[1:]):
        raise ValueError(
            'Feature {} ({}) had invalid shape {} for FixedLenFeature: apart '
            'from the batch dimension, all dimensions must have known size'
            .format(name, tensor, shape))
      feature_spec[name] = tf.io.FixedLenFeature(shape.as_list()[1:],
                                                 tensor.dtype)
    elif isinstance(tensor, tf.RaggedTensor):
      tf.compat.v1.logging.warn(
          'Feature %s was a RaggedTensor.  A Schema will be generated but the '
          'Schema cannot be used with a coder (e.g. to materialize output '
          'data) or to generated a feature spec.', name)
      # Arbitrarily select VarLenFeature.
      feature_spec[name] = tf.io.VarLenFeature(tensor.dtype)
    else:
      raise TypeError(
          'Expected a Tensor or SparseTensor, got {} of type {} for feature {}'
          .format(tensor, type(tensor), name))

  return feature_spec

Source File: neighbor_features.py From neural-structured-learning with Apache License 2.0

4 votes

def make_missing_neighbor_inputs(neighbor_config,
                                 inputs,
                                 weight_dtype=tf.float32):
  """Makes additional inputs for neighbor features if necessary.

  Args:
    neighbor_config: An instance of `configs.GraphNeighborConfig` specifying the
      number of neighbors and how neighbor features should be named.
    inputs: Dictionary of input tensors that may be missing neighbor features.
      The keys are the features names. See `utils.unpack_neighbor_features` for
      expected names of neighbor features and weights.
    weight_dtype: `tf.Dtype` for neighbors weights. Defaults to `tf.float32`.

  Returns:
    A dictionary of neighbor feature and weight tensors that do not already
    exist in `inputs`. The keys are specified according to `neighbor_config`.
  """
  existing_feature_names = set(inputs.keys())
  neighbor_inputs = {}
  for i in range(neighbor_config.max_neighbors):  # For each potential neighbor.
    # Weight of the neighbor.
    weight_name = '{}{}{}'.format(neighbor_config.prefix, i,
                                  neighbor_config.weight_suffix)
    if weight_name not in existing_feature_names:
      neighbor_inputs[weight_name] = tf.keras.Input((1,),
                                                    dtype=weight_dtype,
                                                    name=weight_name)
    # For inputs without existing neighbor features, replicate them.
    for feature_name, tensor in inputs.items():
      if feature_name.startswith(neighbor_config.prefix):
        continue
      neighbor_feature_name = '{}{}_{}'.format(neighbor_config.prefix, i,
                                               feature_name)
      if neighbor_feature_name not in existing_feature_names:
        neighbor_inputs[neighbor_feature_name] = tf.keras.Input(
            tensor.shape[1:],
            batch_size=tensor.shape[0],
            dtype=tensor.dtype,
            name=neighbor_feature_name,
            ragged=isinstance(tensor, tf.RaggedTensor),
            sparse=isinstance(tensor, tf.sparse.SparseTensor))
  return neighbor_inputs

Source File: tensor_adapter_test.py From tfx-bsl with Apache License 2.0

4 votes

def _MakeRaggedTensorDTypesTestCases():
  result = []
  tensor_representation_textpb = """
  ragged_tensor {
    feature_path {
      step: "ragged_feature"
    }
  }
  """
  for t in _ALL_SUPPORTED_VALUE_TYPES:
    for list_type_factory in (("list", pa.list_), ("large_list",
                                                   pa.large_list)):
      expected_type_spec = tf.RaggedTensorSpec([None, None],
                                               _ARROW_TYPE_TO_TF_TYPE[t],
                                               ragged_rank=1,
                                               row_splits_dtype=tf.int64)
      if pa.types.is_integer(t):
        values = [[1, 2], None, [], [3]]
        expected_values = [1, 2, 3]
      elif pa.types.is_floating(t):
        values = [[1.0, 2.0], None, [], [3.0]]
        expected_values = [1.0, 2.0, 3.0]
      else:
        values = [[b"a", b"b"], None, [], [b"c"]]
        expected_values = [b"a", b"b", b"c"]
      row_splits = np.asarray([0, 2, 2, 2, 3], dtype=np.int64)

      if tf.executing_eagerly():
        expected_output = tf.RaggedTensor.from_row_splits(
            values=tf.constant(
                expected_values, dtype=_ARROW_TYPE_TO_TF_TYPE[t]),
            row_splits=row_splits)
      else:
        expected_output = tf.compat.v1.ragged.RaggedTensorValue(
            values=np.array(expected_values, _ARROW_TYPE_TO_NP_TYPE[t]),
            row_splits=row_splits)

      result.append({
          "testcase_name":
              "1D_{}_{}".format(t, list_type_factory[0]),
          "tensor_representation_textpb":
              tensor_representation_textpb,
          "record_batch":
              pa.RecordBatch.from_arrays(
                  [pa.array(values, type=list_type_factory[1](t))],
                  ["ragged_feature"]),
          "expected_ragged_tensor":
              expected_output,
          "expected_type_spec":
              expected_type_spec,
      })

  return result

Python tensorflow.RaggedTensor() Examples