Python tensorflow.RaggedTensor() Examples
The following are 23
code examples of tensorflow.RaggedTensor().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow
, or try the search function
.
Example #1
Source File: export.py From estimator with Apache License 2.0 | 6 votes |
def _check_tensor(tensor, name, error_label='feature'): """Check that passed `tensor` is a Tensor or SparseTensor or RaggedTensor.""" if not (isinstance(tensor, tf.Tensor) or isinstance(tensor, tf.sparse.SparseTensor) or isinstance(tensor, tf.RaggedTensor)): fmt_name = ' {}'.format(name) if name else '' value_error = ValueError('{}{} must be a Tensor, SparseTensor, or ' 'RaggedTensor.'.format(error_label, fmt_name)) # NOTE(ericmc): This if-else block is a specific carve-out for # LabeledTensor, which has a `.tensor` attribute and which is # convertible to tf.Tensor via ops.convert_to_tensor. # Allowing all types convertible to tf.Tensor is considered by soergel@ # to be too permissive. # TODO(soergel): accept any type convertible to Tensor, # as in cl/193238295 snapshot #6. if hasattr(tensor, 'tensor'): try: ops.convert_to_tensor(tensor) except TypeError: raise value_error else: raise value_error
Example #2
Source File: tokenizer.py From OpenNMT-tf with MIT License | 6 votes |
def tokenize(self, text): """Tokenizes text. Args: text: A string or batch of strings to tokenize as a ``tf.Tensor`` or Python values. Returns: - If :obj:`text` is a Python string, a list of Python strings. - If :obj:`text` is a list of Python strings, a list of list of Python strings. - If :obj:`text` is a 0-D ``tf.Tensor``, a 1-D ``tf.Tensor``. - If :obj:`text` is a 1-D ``tf.Tensor``, a 2-D ``tf.RaggedTensor``. Raises: ValueError: if the rank of :obj:`text` is greater than 1. """ with tf.device("cpu:0"): return self._tokenize(text)
Example #3
Source File: tokenizer.py From OpenNMT-tf with MIT License | 6 votes |
def detokenize(self, tokens, sequence_length=None): """Detokenizes tokens. The Tensor version supports batches of tokens. Args: tokens: Tokens or batch of tokens as a ``tf.Tensor``, ``tf.RaggedTensor``, or Python values. sequence_length: The length of each sequence. Required if :obj:`tokens` is a dense 2-D ``tf.Tensor``. Returns: - If :obj:`tokens` is a list of list of Python strings, a list of Python strings. - If :obj:`tokens` is a list of Python strings, a Python string. - If :obj:`tokens` is a N-D ``tf.Tensor`` (or ``tf.RaggedTensor``), a (N-1)-D ``tf.Tensor``. Raises: ValueError: if the rank of :obj:`tokens` is greater than 2. ValueError: if :obj:`tokens` is a 2-D dense ``tf.Tensor`` and :obj:`sequence_length` is not set. """ with tf.device("cpu:0"): return self._detokenize(tokens, sequence_length)
Example #4
Source File: tokenizer.py From OpenNMT-tf with MIT License | 6 votes |
def _detokenize(self, tokens, sequence_length): if isinstance(tokens, tf.RaggedTensor): rank = len(tokens.shape) if rank == 1: return self._detokenize_tensor(tokens.values) elif rank == 2: return self._detokenize_ragged_tensor(tokens) else: raise ValueError("Unsupported RaggedTensor rank %d for detokenization" % rank) elif tf.is_tensor(tokens): rank = len(tokens.shape) if rank == 1: return self._detokenize_tensor(tokens) elif rank == 2: if sequence_length is None: raise ValueError("sequence_length is required for Tensor detokenization") return self._detokenize_batch_tensor(tokens, sequence_length) else: raise ValueError("Unsupported tensor rank %d for detokenization" % rank) elif isinstance(tokens, list) and tokens and isinstance(tokens[0], list): return list(map(self.detokenize, tokens)) else: tokens = [tf.compat.as_text(token) for token in tokens] return self._detokenize_string(tokens)
Example #5
Source File: tensor_adapter_test.py From tfx-bsl with Apache License 2.0 | 6 votes |
def testRaggedTensor(self, tensor_representation_textpb, record_batch, expected_type_spec, expected_ragged_tensor): tensor_representation = text_format.Parse(tensor_representation_textpb, schema_pb2.TensorRepresentation()) adapter = tensor_adapter.TensorAdapter( tensor_adapter.TensorAdapterConfig(record_batch.schema, {"output": tensor_representation})) converted = adapter.ToBatchTensors(record_batch) self.assertLen(converted, 1) self.assertIn("output", converted) actual_output = converted["output"] self.assertIsInstance( actual_output, (tf.RaggedTensor, tf.compat.v1.ragged.RaggedTensorValue)) if tf.executing_eagerly(): self.assertTrue( expected_type_spec.is_compatible_with(actual_output), "{} is not compatible with spec {}".format(actual_output, expected_type_spec)) self.assertRaggedAllEqual(actual_output, expected_ragged_tensor) self.assertAdapterCanProduceNonEagerInEagerMode(adapter, record_batch)
Example #6
Source File: saved_transform_io_test.py From transform with Apache License 2.0 | 5 votes |
def test_ragged_roundtrip(self): if not hasattr(meta_graph_pb2.TensorInfo, 'CompositeTensor'): self.skipTest('This version of TensorFlow does not support ' 'CompositeTenors in TensorInfo.') export_path = os.path.join(tempfile.mkdtemp(), 'export') with tf.compat.v1.Graph().as_default(): with tf.compat.v1.Session().as_default() as session: input_float = tf.compat.v1.ragged.placeholder(tf.float32, ragged_rank=1, value_shape=[]) output = input_float / 2.0 inputs = {'input': input_float} outputs = {'output': output} saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path) with tf.compat.v1.Graph().as_default(): with tf.compat.v1.Session().as_default() as session: splits = np.array([0, 2, 3], dtype=np.int64) values = np.array([1.0, 2.0, 4.0], dtype=np.float32) input_ragged = tf.RaggedTensor.from_row_splits(values, splits) # Using a computed input gives confidence that the graphs are fused inputs = {'input': input_ragged * 10} _, outputs = ( saved_transform_io.partially_apply_saved_transform_internal( export_path, inputs)) output_ragged = outputs['output'] self.assertIsInstance(output_ragged, tf.RaggedTensor) result = session.run(output_ragged) # indices and shape unchanged; values multipled by 10 and divided by 2 self.assertAllEqual(splits, result.row_splits) self.assertEqual([5.0, 10.0, 20.0], result.values.tolist())
Example #7
Source File: util.py From model-analysis with Apache License 2.0 | 5 votes |
def wrap_tensor_or_dict_of_tensors_in_identity( tensor_or_dict_of_tensors: types.TensorTypeMaybeDict ) -> types.TensorTypeMaybeDict: # pyformat: disable """Wrap the given Tensor / dict of Tensors in tf.identity. Args: tensor_or_dict_of_tensors: Tensor or dict of Tensors to wrap around. Workaround for TensorFlow issue #17568 (b/71769512). Returns: Tensor or dict of Tensors wrapped with tf.identity. Raises: ValueError: We could not wrap the given Tensor / dict of Tensors in tf.identity. """ # pyformat: enable def _wrap_tensor_in_identity(tensor: types.TensorType) -> types.TensorType: if isinstance(tensor, (tf.Tensor, tf.RaggedTensor)): return tf.identity(tensor) elif isinstance(tensor, tf.SparseTensor): return tf.SparseTensor( indices=tf.identity(tensor.indices), values=tf.identity(tensor.values), dense_shape=tf.identity(tensor.dense_shape)) else: raise ValueError('could not wrap Tensor %s in identity' % str(tensor)) if isinstance(tensor_or_dict_of_tensors, dict): result = {} for k, v in tensor_or_dict_of_tensors.items(): # Dictionary elements should only be Tensors (and not dictionaries). result[k] = _wrap_tensor_in_identity(v) return result else: return _wrap_tensor_in_identity(tensor_or_dict_of_tensors)
Example #8
Source File: graph_tools.py From transform with Apache License 2.0 | 5 votes |
def get_dependent_inputs(graph, input_tensors, output_tensors): """Returns tensors in input_tensors that (transitively) produce output_tensors. Args: graph: A `tf.Graph`. It could be the (intermediate) output tf graph in any transform phase (including phase 0 where no tensor replacement has yet happened). input_tensors: A dict of logical name to `tf.Tensor`, `tf.SparseTensor`, or `tf.RaggedTensor`. Logical name doesn't have any implications in this method and can be anything. In some cases it is the feature name corresponding to the input tensor. output_tensors: A dict of logical name to `tf.Tensor`, `tf.SparseTensor`, or `tf.RaggedTensor`, or a list of `tf.Tensor`, `tf.SparseTensor`, or `tf.RaggedTensor`. Returns: A dict of logical name to `tf.Tensor`, `tf.SparseTensor`, or `tf.RaggedTensor` that are filtered from input_tensors (transitively) producing output_tensors """ if isinstance(output_tensors, list): output_iterator = output_tensors else: output_iterator = six.itervalues(output_tensors) # Since this method may be called before all tensor replacements are ready, to # fulfill the precondition of InitializableGraphAnalyzer, we fake the # readiness of tensor replacements. Note that the readiness of replacement # tensors doesn't affect the correctness of dependencies tracing. tensor_sinks = graph.get_collection(analyzer_nodes.TENSOR_REPLACEMENTS) sink_tensors_ready = [(sink.tensor, False) for sink in tensor_sinks] graph_analyzer = InitializableGraphAnalyzer(graph, input_tensors, sink_tensors_ready) dependent_inputs = {} for output_tensor in output_iterator: dependent_inputs.update(graph_analyzer.get_dependent_inputs(output_tensor)) return { name: tensor for name, tensor in six.iteritems(input_tensors) if name in dependent_inputs }
Example #9
Source File: graph_tools.py From transform with Apache License 2.0 | 5 votes |
def get_dependent_inputs(self, tensor_or_op): """Gets the inputs that the given `tensor_or_op` transitively depends on. Args: tensor_or_op: A `Tensor`, `SparseTensor`, `RaggedTensor` or `Operation`. Returns: A dict of name to `Tensor`, `SparseTensor`, or `RaggedTensor` (sub-dict of `input_signature`) that the given `tensor_or_op` depends on. Raises: TypeError: If `tensor_or_op` is of an unsupported type. """ if not isinstance( tensor_or_op, (tf.Tensor, tf.SparseTensor, tf.RaggedTensor, tf.Operation)): raise TypeError( 'Expected Tensor, SparseTensor, RaggedTensor or Operation got {} of ' 'type {}'.format(tensor_or_op, type(tensor_or_op))) dependents = set() for component in _decompose_tensor_or_op(tensor_or_op): dependents.update( self._graph_analyzer.analyze_tensor(component).dependent_sources) result = {} for name, tensor in six.iteritems(self._input_signature): if any( tf_utils.hashable_tensor_or_op(component) in dependents for component in _decompose_tensor_or_op(tensor)): result[name] = tensor return result
Example #10
Source File: graph_tools.py From transform with Apache License 2.0 | 5 votes |
def _make_source_infos_dict(self, input_signature, replaced_tensors_ready): """Builds a dictionary from source tensors to _SourceInfos. This dictionary stores information about the sources of the graph. Each tensor in replaced_tensors_ready is a source whose readiness is known and has no name. Each tensor (or component of a tensor) in input_signature is ready to run and has a name determined by the signature. Args: input_signature: A dict whose keys are strings and values are `Tensor`s, `SparseTensor`s, or `RaggedTensor`s. replaced_tensors_ready: a dict from `Tensor`, `SparseTensor`s, or `RaggedTensor`s to bool indicating whether the tensor is ready in this phase. Returns: a dictionary from source tensors to _SourceInfos. """ result = {} for tensor_or_op, is_ready in six.iteritems(replaced_tensors_ready): for component in _decompose_tensor_or_op( tf_utils.deref_tensor_or_op(tensor_or_op)): result[tf_utils.hashable_tensor_or_op(component)] = _SourceInfo( is_ready, None) for name, tensor in six.iteritems(input_signature): if isinstance(tensor, tf.Tensor): _set_unique_value_in_dict(result, tensor, _SourceInfo(True, '{}$tensor'.format(name))) elif isinstance(tensor, composite_tensor.CompositeTensor): for idx, tensor_component in enumerate(_decompose_tensor_or_op(tensor)): _set_unique_value_in_dict( result, tensor_component, _SourceInfo(True, '{}$composite_tensor_{}'.format(name, idx))) else: raise TypeError( 'Expected Tensor, or CompositeTensor, got {} of type {}'.format( tensor, type(tensor))) return result
Example #11
Source File: graph_tools.py From transform with Apache License 2.0 | 5 votes |
def ready_to_run(self, tensor_or_op): """Determine if a given tensor or op is ready to run. A tensor is ready to run if every tensor in all its transitive dependencies are set to `True` in `known_ready`. Note that if a placeholder is encountered, this will result in an error as it is assumed that all placeholders are keys in `known_ready`. This is to avoid unexpected behavior when the user creates placeholders (as opposed to placeholders created by the tf.Transform framework). Similarly encountering a Table op is an error because a table should be a key in `known_ready` (in the case of analyzing the main session run) or should not be encountered (in the case of analyzing the graph init run). Args: tensor_or_op: A `Tensor`, `SparseTensor`, `RaggedTensor` or `Operation` Returns: A bool indicating whether then tensor is ready to run. Raises: ValueError: If a placeholder or table is encountered. _UnexpectedTableError: If an initializable table op is encountered. _UnexpectedPlaceholderError: If a placeholder is encountered. """ if not isinstance( tensor_or_op, (tf.Tensor, tf.SparseTensor, tf.RaggedTensor, tf.Operation)): raise TypeError( 'Expected Tensor, SparseTensor, RaggedTensor, or Operation got {} of type {}' .format(tensor_or_op, type(tensor_or_op))) return all( self.analyze_tensor(component).is_ready_to_run for component in _decompose_tensor_or_op(tensor_or_op))
Example #12
Source File: saved_transform_io_v2_test.py From transform with Apache License 2.0 | 5 votes |
def test_ragged_roundtrip(self): if not hasattr(meta_graph_pb2.TensorInfo, 'CompositeTensor'): self.skipTest('This version of TensorFlow does not support ' 'CompositeTenors in TensorInfo.') export_path = os.path.join(tempfile.mkdtemp(), 'export') with tf.compat.v1.Graph().as_default(): with tf.compat.v1.Session().as_default() as session: input_float = tf.compat.v1.ragged.placeholder(tf.float32, ragged_rank=1, value_shape=[]) output = input_float / 2.0 inputs = {'input': input_float} outputs = {'output': output} saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path) splits = np.array([0, 2, 3], dtype=np.int64) values = np.array([1.0, 2.0, 4.0], dtype=np.float32) input_ragged = tf.RaggedTensor.from_row_splits(values, splits) # Using a computed input gives confidence that the graphs are fused inputs = {'input': input_ragged * 10} saved_model_loader = saved_transform_io_v2.SavedModelLoader(export_path) outputs = saved_model_loader.apply_v1_transform_model_in_v2(inputs) result = outputs['output'] self.assertIsInstance(result, tf.RaggedTensor) # indices and shape unchanged; values multipled by 10 and divided by 2 self.assertAllEqual(splits, result.row_splits) self.assertEqual([5.0, 10.0, 20.0], result.values.numpy().tolist())
Example #13
Source File: text_inputter.py From OpenNMT-tf with MIT License | 5 votes |
def add_sequence_controls(ids, length, start_id=None, end_id=None): """Adds sequence control tokens. Args: ids: Sequence of ids as 1D or 2D (batch) tensor. length: Sequence length as 0D or 1D (batch) tensor. start_id: Id to prepend to the sequence (set ``None`` to disable). end_id: Id to append to the sequence (set ``None`` to disable). Returns: A tuple ``(ids, length)``. """ rank = ids.shape.rank if rank not in (1, 2): raise ValueError("Unsupported rank %d (expected 1 or 2)" % rank) batch_size = tf.shape(ids)[0] if rank == 2 else None def _make_column(value): value = tf.constant(value, dtype=ids.dtype) if batch_size is not None: value = tf.fill([batch_size], value) return tf.expand_dims(value, -1) if start_id is not None: start_ids = _make_column(constants.START_OF_SENTENCE_ID) ids = tf.concat([start_ids, ids], axis=-1) length += 1 if end_id is not None: end_ids = _make_column(constants.END_OF_SENTENCE_ID) if batch_size is not None: # Run concat on RaggedTensor to handle sequences with variable length. ids = tf.RaggedTensor.from_tensor(ids, lengths=length) ids = tf.concat([ids, end_ids], axis=-1) if batch_size is not None: ids = ids.to_tensor() length += 1 return ids, length
Example #14
Source File: mappers.py From transform with Apache License 2.0 | 5 votes |
def word_count(tokens, name=None): """Find the token count of each document/row. `tokens` is either a `RaggedTensor` or `SparseTensor`, representing tokenized strings. This function simply returns size of each row, so the dtype is not constrained to string. Args: tokens: either (1) a two-dimensional `SparseTensor`, or (2) a `RaggedTensor` with ragged rank of 1, non-ragged rank of 1 of dtype `tf.string` containing tokens to be counted name: (Optional) A name for this operation. Returns: A one-dimensional `Tensor` the token counts of each row. Raises: ValueError: if tokens is neither sparse nor ragged """ with tf.compat.v1.name_scope(name, 'word_count'): if isinstance(tokens, tf.RaggedTensor): return tokens.row_lengths() elif isinstance(tokens, tf.SparseTensor): result = tf.sparse.reduce_sum( tf.SparseTensor(indices=tokens.indices, values=tf.ones_like(tokens.values, dtype=tf.int64), dense_shape=tokens.dense_shape), axis=1) result.set_shape([tokens.shape[0]]) return result else: raise ValueError('Invalid token tensor')
Example #15
Source File: tokenizer_test.py From OpenNMT-tf with MIT License | 5 votes |
def _testTokenizerOnBatchTensor(self, tokenizer, text, ref_tokens): text = tf.constant(text) tokens = tokenizer.tokenize(text) self.assertIsInstance(tokens, tf.RaggedTensor) self.assertAllEqual(tokens.to_list(), tf.nest.map_structure(tf.compat.as_bytes, ref_tokens))
Example #16
Source File: text.py From OpenNMT-tf with MIT License | 5 votes |
def tokens_to_words(tokens, subword_token="■", is_spacer=None): """Converts a sequence of tokens to a sequence of words. Example: >>> opennmt.data.tokens_to_words(["He@@", "llo", "W@@", "orld", "@@!"], subword_token="@@") <tf.RaggedTensor [[b'He@@', b'llo'], [b'W@@', b'orld', b'@@!']]> Args: tokens: A 1D string ``tf.Tensor``. subword_token: The special token used by the subword tokenizer. is_spacer: Whether :obj:`subword_token` is used as a spacer (as in SentencePiece) or a joiner (as in BPE). If ``None``, will infer directly from :obj:`subword_token`. Returns: The words as a 2D string ``tf.RaggedTensor``. """ if is_spacer is None: is_spacer = subword_token == "▁" if is_spacer: # First token implicitly starts with a spacer. left_and_single = tf.logical_or( tf.strings.regex_full_match(tokens, "%s.*" % subword_token), tf.one_hot(0, tf.shape(tokens)[0], on_value=True, off_value=False)) right = tf.strings.regex_full_match(tokens, ".+%s" % subword_token) word_start = tf.logical_or(tf.roll(right, shift=1, axis=0), left_and_single) else: right = tf.strings.regex_full_match(tokens, ".*%s" % subword_token) left = tf.strings.regex_full_match(tokens, "%s.*" % subword_token) subword = tf.logical_or(tf.roll(right, shift=1, axis=0), left) word_start = tf.logical_not(subword) start_indices = tf.squeeze(tf.where(word_start), -1) return tf.RaggedTensor.from_row_starts(tokens, start_indices)
Example #17
Source File: text.py From OpenNMT-tf with MIT License | 5 votes |
def tokens_to_chars(tokens): """Splits tokens into unicode characters. Example: >>> opennmt.data.tokens_to_chars(["hello", "world"]) <tf.RaggedTensor [[b'h', b'e', b'l', b'l', b'o'], [b'w', b'o', b'r', b'l', b'd']]> Args: tokens: A string ``tf.Tensor`` of shape :math:`[T]`. Returns: The characters as a 2D string ``tf.RaggedTensor``. """ return tf.strings.unicode_split(tokens, "UTF-8")
Example #18
Source File: tokenizer.py From OpenNMT-tf with MIT License | 5 votes |
def _detokenize_batch_tensor(self, tokens, sequence_length): ragged = tf.RaggedTensor.from_tensor(tokens, lengths=sequence_length) return self._detokenize_ragged_tensor(ragged)
Example #19
Source File: tokenizer.py From OpenNMT-tf with MIT License | 5 votes |
def _detokenize_ragged_tensor(self, tokens): """Detokenizes a batch of tokens as a ``tf.RaggedTensor`` When not overriden, this default implementation calls _detokenize_batch_tensor on the dense representation. Args: tokens: A 2-D ``tf.RaggedTensor``. Returns: A 1-D string ``tf.Tensor``. """ return self._detokenize_batch_tensor(tokens.to_tensor(), tokens.row_lengths())
Example #20
Source File: text_inputter.py From OpenNMT-tf with MIT License | 5 votes |
def make_features(self, element=None, features=None, training=None): """Tokenizes raw text.""" if features is None: features = {} if "tokens" in features: return features if "text" in features: element = features.pop("text") tokens = self.tokenizer.tokenize(element) if isinstance(tokens, tf.RaggedTensor): length = tokens.row_lengths() tokens = tokens.to_tensor() else: length = tf.shape(tokens)[0] if training and self.noiser is not None: noisy_tokens, noisy_length = self.noiser(tokens, keep_shape=False) if self.in_place_noise: tokens, length = tf.cond( tf.random.uniform([]) < self.noise_probability, true_fn=lambda: (noisy_tokens, noisy_length), false_fn=lambda: (tokens, length)) else: # Call make_features again to fill the remaining noisy features. noisy_features = dict(tokens=noisy_tokens, length=noisy_length) noisy_features = self.make_features(features=noisy_features, training=training) for key, value in noisy_features.items(): features["noisy_%s" % key] = value features["length"] = length features["tokens"] = tokens return features
Example #21
Source File: schema_inference.py From transform with Apache License 2.0 | 4 votes |
def _feature_spec_from_batched_tensors(tensors): """Infer a feature spec from a dict of tensors. Args: tensors: A dict whose keys are strings and values are `Tensor` or `SparseTensor`s. Returns: A feature spec inferred from the types and shapes of the tensors. Raises: ValueError: If the feature spec cannot be inferred. TypeError: If any of the values of `tensors` are not a `Tensor` or `SparseTensor`. """ feature_spec = {} for name, tensor in six.iteritems(tensors): tensor = tensors[name] if tensor.dtype not in (tf.string, tf.int64, tf.float32): raise ValueError('Feature {} ({}) had invalid dtype {} for feature spec' .format(name, tensor, tensor.dtype)) if isinstance(tensor, tf.SparseTensor): shape = tensor.get_shape() if shape.ndims != 2: raise ValueError( 'Feature {} ({}) had invalid shape {} for VarLenFeature: must have ' 'rank 2'.format(name, tensor, shape)) feature_spec[name] = tf.io.VarLenFeature(tensor.dtype) elif isinstance(tensor, tf.Tensor): shape = tensor.get_shape() if shape.ndims in [None, 0]: raise ValueError( 'Feature {} ({}) had invalid shape {} for FixedLenFeature: must ' 'have rank at least 1'.format(name, tensor, shape)) if any(dim is None for dim in shape.as_list()[1:]): raise ValueError( 'Feature {} ({}) had invalid shape {} for FixedLenFeature: apart ' 'from the batch dimension, all dimensions must have known size' .format(name, tensor, shape)) feature_spec[name] = tf.io.FixedLenFeature(shape.as_list()[1:], tensor.dtype) elif isinstance(tensor, tf.RaggedTensor): tf.compat.v1.logging.warn( 'Feature %s was a RaggedTensor. A Schema will be generated but the ' 'Schema cannot be used with a coder (e.g. to materialize output ' 'data) or to generated a feature spec.', name) # Arbitrarily select VarLenFeature. feature_spec[name] = tf.io.VarLenFeature(tensor.dtype) else: raise TypeError( 'Expected a Tensor or SparseTensor, got {} of type {} for feature {}' .format(tensor, type(tensor), name)) return feature_spec
Example #22
Source File: neighbor_features.py From neural-structured-learning with Apache License 2.0 | 4 votes |
def make_missing_neighbor_inputs(neighbor_config, inputs, weight_dtype=tf.float32): """Makes additional inputs for neighbor features if necessary. Args: neighbor_config: An instance of `configs.GraphNeighborConfig` specifying the number of neighbors and how neighbor features should be named. inputs: Dictionary of input tensors that may be missing neighbor features. The keys are the features names. See `utils.unpack_neighbor_features` for expected names of neighbor features and weights. weight_dtype: `tf.Dtype` for neighbors weights. Defaults to `tf.float32`. Returns: A dictionary of neighbor feature and weight tensors that do not already exist in `inputs`. The keys are specified according to `neighbor_config`. """ existing_feature_names = set(inputs.keys()) neighbor_inputs = {} for i in range(neighbor_config.max_neighbors): # For each potential neighbor. # Weight of the neighbor. weight_name = '{}{}{}'.format(neighbor_config.prefix, i, neighbor_config.weight_suffix) if weight_name not in existing_feature_names: neighbor_inputs[weight_name] = tf.keras.Input((1,), dtype=weight_dtype, name=weight_name) # For inputs without existing neighbor features, replicate them. for feature_name, tensor in inputs.items(): if feature_name.startswith(neighbor_config.prefix): continue neighbor_feature_name = '{}{}_{}'.format(neighbor_config.prefix, i, feature_name) if neighbor_feature_name not in existing_feature_names: neighbor_inputs[neighbor_feature_name] = tf.keras.Input( tensor.shape[1:], batch_size=tensor.shape[0], dtype=tensor.dtype, name=neighbor_feature_name, ragged=isinstance(tensor, tf.RaggedTensor), sparse=isinstance(tensor, tf.sparse.SparseTensor)) return neighbor_inputs
Example #23
Source File: tensor_adapter_test.py From tfx-bsl with Apache License 2.0 | 4 votes |
def _MakeRaggedTensorDTypesTestCases(): result = [] tensor_representation_textpb = """ ragged_tensor { feature_path { step: "ragged_feature" } } """ for t in _ALL_SUPPORTED_VALUE_TYPES: for list_type_factory in (("list", pa.list_), ("large_list", pa.large_list)): expected_type_spec = tf.RaggedTensorSpec([None, None], _ARROW_TYPE_TO_TF_TYPE[t], ragged_rank=1, row_splits_dtype=tf.int64) if pa.types.is_integer(t): values = [[1, 2], None, [], [3]] expected_values = [1, 2, 3] elif pa.types.is_floating(t): values = [[1.0, 2.0], None, [], [3.0]] expected_values = [1.0, 2.0, 3.0] else: values = [[b"a", b"b"], None, [], [b"c"]] expected_values = [b"a", b"b", b"c"] row_splits = np.asarray([0, 2, 2, 2, 3], dtype=np.int64) if tf.executing_eagerly(): expected_output = tf.RaggedTensor.from_row_splits( values=tf.constant( expected_values, dtype=_ARROW_TYPE_TO_TF_TYPE[t]), row_splits=row_splits) else: expected_output = tf.compat.v1.ragged.RaggedTensorValue( values=np.array(expected_values, _ARROW_TYPE_TO_NP_TYPE[t]), row_splits=row_splits) result.append({ "testcase_name": "1D_{}_{}".format(t, list_type_factory[0]), "tensor_representation_textpb": tensor_representation_textpb, "record_batch": pa.RecordBatch.from_arrays( [pa.array(values, type=list_type_factory[1](t))], ["ragged_feature"]), "expected_ragged_tensor": expected_output, "expected_type_spec": expected_type_spec, }) return result