Python tensorflow.RaggedTensor() Examples
The following are 23
code examples of tensorflow.RaggedTensor().
Example #1
Source File: From estimator with Apache License 2.0 | 6 votes |
def _check_tensor(tensor, name, error_label='feature'): """Check that passed `tensor` is a Tensor or SparseTensor or RaggedTensor.""" if not (isinstance(tensor, tf.Tensor) or isinstance(tensor, tf.sparse.SparseTensor) or isinstance(tensor, tf.RaggedTensor)): fmt_name = ' {}'.format(name) if name else '' value_error = ValueError('{}{} must be a Tensor, SparseTensor, or ' 'RaggedTensor.'.format(error_label, fmt_name)) # NOTE(ericmc): This if-else block is a specific carve-out for # LabeledTensor, which has a `.tensor` attribute and which is # convertible to tf.Tensor via ops.convert_to_tensor. # Allowing all types convertible to tf.Tensor is considered by soergel@ # to be too permissive. # TODO(soergel): accept any type convertible to Tensor, # as in cl/193238295 snapshot #6. if hasattr(tensor, 'tensor'): try: ops.convert_to_tensor(tensor) except TypeError: raise value_error else: raise value_error
Example #2
Source File: From OpenNMT-tf with MIT License | 6 votes |
def tokenize(self, text): """Tokenizes text. Args: text: A string or batch of strings to tokenize as a ``tf.Tensor`` or Python values. Returns: - If :obj:`text` is a Python string, a list of Python strings. - If :obj:`text` is a list of Python strings, a list of list of Python strings. - If :obj:`text` is a 0-D ``tf.Tensor``, a 1-D ``tf.Tensor``. - If :obj:`text` is a 1-D ``tf.Tensor``, a 2-D ``tf.RaggedTensor``. Raises: ValueError: if the rank of :obj:`text` is greater than 1. """ with tf.device("cpu:0"): return self._tokenize(text)
Example #3
Source File: From OpenNMT-tf with MIT License | 6 votes |
def detokenize(self, tokens, sequence_length=None): """Detokenizes tokens. The Tensor version supports batches of tokens. Args: tokens: Tokens or batch of tokens as a ``tf.Tensor``, ``tf.RaggedTensor``, or Python values. sequence_length: The length of each sequence. Required if :obj:`tokens` is a dense 2-D ``tf.Tensor``. Returns: - If :obj:`tokens` is a list of list of Python strings, a list of Python strings. - If :obj:`tokens` is a list of Python strings, a Python string. - If :obj:`tokens` is a N-D ``tf.Tensor`` (or ``tf.RaggedTensor``), a (N-1)-D ``tf.Tensor``. Raises: ValueError: if the rank of :obj:`tokens` is greater than 2. ValueError: if :obj:`tokens` is a 2-D dense ``tf.Tensor`` and :obj:`sequence_length` is not set. """ with tf.device("cpu:0"): return self._detokenize(tokens, sequence_length)
Example #4
Source File: From OpenNMT-tf with MIT License | 6 votes |
def _detokenize(self, tokens, sequence_length): if isinstance(tokens, tf.RaggedTensor): rank = len(tokens.shape) if rank == 1: return self._detokenize_tensor(tokens.values) elif rank == 2: return self._detokenize_ragged_tensor(tokens) else: raise ValueError("Unsupported RaggedTensor rank %d for detokenization" % rank) elif tf.is_tensor(tokens): rank = len(tokens.shape) if rank == 1: return self._detokenize_tensor(tokens) elif rank == 2: if sequence_length is None: raise ValueError("sequence_length is required for Tensor detokenization") return self._detokenize_batch_tensor(tokens, sequence_length) else: raise ValueError("Unsupported tensor rank %d for detokenization" % rank) elif isinstance(tokens, list) and tokens and isinstance(tokens[0], list): return list(map(self.detokenize, tokens)) else: tokens = [tf.compat.as_text(token) for token in tokens] return self._detokenize_string(tokens)
Example #5
Source File: From tfx-bsl with Apache License 2.0 | 6 votes |
def testRaggedTensor(self, tensor_representation_textpb, record_batch, expected_type_spec, expected_ragged_tensor): tensor_representation = text_format.Parse(tensor_representation_textpb, schema_pb2.TensorRepresentation()) adapter = tensor_adapter.TensorAdapter( tensor_adapter.TensorAdapterConfig(record_batch.schema, {"output": tensor_representation})) converted = adapter.ToBatchTensors(record_batch) self.assertLen(converted, 1) self.assertIn("output", converted) actual_output = converted["output"] self.assertIsInstance( actual_output, (tf.RaggedTensor, tf.compat.v1.ragged.RaggedTensorValue)) if tf.executing_eagerly(): self.assertTrue( expected_type_spec.is_compatible_with(actual_output), "{} is not compatible with spec {}".format(actual_output, expected_type_spec)) self.assertRaggedAllEqual(actual_output, expected_ragged_tensor) self.assertAdapterCanProduceNonEagerInEagerMode(adapter, record_batch)
Example #6
Source File: From transform with Apache License 2.0 | 5 votes |
def test_ragged_roundtrip(self): if not hasattr(meta_graph_pb2.TensorInfo, 'CompositeTensor'): self.skipTest('This version of TensorFlow does not support ' 'CompositeTenors in TensorInfo.') export_path = os.path.join(tempfile.mkdtemp(), 'export') with tf.compat.v1.Graph().as_default(): with tf.compat.v1.Session().as_default() as session: input_float = tf.compat.v1.ragged.placeholder(tf.float32, ragged_rank=1, value_shape=[]) output = input_float / 2.0 inputs = {'input': input_float} outputs = {'output': output} saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path) with tf.compat.v1.Graph().as_default(): with tf.compat.v1.Session().as_default() as session: splits = np.array([0, 2, 3], dtype=np.int64) values = np.array([1.0, 2.0, 4.0], dtype=np.float32) input_ragged = tf.RaggedTensor.from_row_splits(values, splits) # Using a computed input gives confidence that the graphs are fused inputs = {'input': input_ragged * 10} _, outputs = ( saved_transform_io.partially_apply_saved_transform_internal( export_path, inputs)) output_ragged = outputs['output'] self.assertIsInstance(output_ragged, tf.RaggedTensor) result = # indices and shape unchanged; values multipled by 10 and divided by 2 self.assertAllEqual(splits, result.row_splits) self.assertEqual([5.0, 10.0, 20.0], result.values.tolist())
Example #7
Source File: From model-analysis with Apache License 2.0 | 5 votes |
def wrap_tensor_or_dict_of_tensors_in_identity( tensor_or_dict_of_tensors: types.TensorTypeMaybeDict ) -> types.TensorTypeMaybeDict: # pyformat: disable """Wrap the given Tensor / dict of Tensors in tf.identity. Args: tensor_or_dict_of_tensors: Tensor or dict of Tensors to wrap around. Workaround for TensorFlow issue #17568 (b/71769512). Returns: Tensor or dict of Tensors wrapped with tf.identity. Raises: ValueError: We could not wrap the given Tensor / dict of Tensors in tf.identity. """ # pyformat: enable def _wrap_tensor_in_identity(tensor: types.TensorType) -> types.TensorType: if isinstance(tensor, (tf.Tensor, tf.RaggedTensor)): return tf.identity(tensor) elif isinstance(tensor, tf.SparseTensor): return tf.SparseTensor( indices=tf.identity(tensor.indices), values=tf.identity(tensor.values), dense_shape=tf.identity(tensor.dense_shape)) else: raise ValueError('could not wrap Tensor %s in identity' % str(tensor)) if isinstance(tensor_or_dict_of_tensors, dict): result = {} for k, v in tensor_or_dict_of_tensors.items(): # Dictionary elements should only be Tensors (and not dictionaries). result[k] = _wrap_tensor_in_identity(v) return result else: return _wrap_tensor_in_identity(tensor_or_dict_of_tensors)
Example #8
Source File: From transform with Apache License 2.0 | 5 votes |
def get_dependent_inputs(graph, input_tensors, output_tensors): """Returns tensors in input_tensors that (transitively) produce output_tensors. Args: graph: A `tf.Graph`. It could be the (intermediate) output tf graph in any transform phase (including phase 0 where no tensor replacement has yet happened). input_tensors: A dict of logical name to `tf.Tensor`, `tf.SparseTensor`, or `tf.RaggedTensor`. Logical name doesn't have any implications in this method and can be anything. In some cases it is the feature name corresponding to the input tensor. output_tensors: A dict of logical name to `tf.Tensor`, `tf.SparseTensor`, or `tf.RaggedTensor`, or a list of `tf.Tensor`, `tf.SparseTensor`, or `tf.RaggedTensor`. Returns: A dict of logical name to `tf.Tensor`, `tf.SparseTensor`, or `tf.RaggedTensor` that are filtered from input_tensors (transitively) producing output_tensors """ if isinstance(output_tensors, list): output_iterator = output_tensors else: output_iterator = six.itervalues(output_tensors) # Since this method may be called before all tensor replacements are ready, to # fulfill the precondition of InitializableGraphAnalyzer, we fake the # readiness of tensor replacements. Note that the readiness of replacement # tensors doesn't affect the correctness of dependencies tracing. tensor_sinks = graph.get_collection(analyzer_nodes.TENSOR_REPLACEMENTS) sink_tensors_ready = [(sink.tensor, False) for sink in tensor_sinks] graph_analyzer = InitializableGraphAnalyzer(graph, input_tensors, sink_tensors_ready) dependent_inputs = {} for output_tensor in output_iterator: dependent_inputs.update(graph_analyzer.get_dependent_inputs(output_tensor)) return { name: tensor for name, tensor in six.iteritems(input_tensors) if name in dependent_inputs }
Example #9
Source File: From transform with Apache License 2.0 | 5 votes |
def get_dependent_inputs(self, tensor_or_op): """Gets the inputs that the given `tensor_or_op` transitively depends on. Args: tensor_or_op: A `Tensor`, `SparseTensor`, `RaggedTensor` or `Operation`. Returns: A dict of name to `Tensor`, `SparseTensor`, or `RaggedTensor` (sub-dict of `input_signature`) that the given `tensor_or_op` depends on. Raises: TypeError: If `tensor_or_op` is of an unsupported type. """ if not isinstance( tensor_or_op, (tf.Tensor, tf.SparseTensor, tf.RaggedTensor, tf.Operation)): raise TypeError( 'Expected Tensor, SparseTensor, RaggedTensor or Operation got {} of ' 'type {}'.format(tensor_or_op, type(tensor_or_op))) dependents = set() for component in _decompose_tensor_or_op(tensor_or_op): dependents.update( self._graph_analyzer.analyze_tensor(component).dependent_sources) result = {} for name, tensor in six.iteritems(self._input_signature): if any( tf_utils.hashable_tensor_or_op(component) in dependents for component in _decompose_tensor_or_op(tensor)): result[name] = tensor return result
Example #10
Source File: From transform with Apache License 2.0 | 5 votes |
def _make_source_infos_dict(self, input_signature, replaced_tensors_ready): """Builds a dictionary from source tensors to _SourceInfos. This dictionary stores information about the sources of the graph. Each tensor in replaced_tensors_ready is a source whose readiness is known and has no name. Each tensor (or component of a tensor) in input_signature is ready to run and has a name determined by the signature. Args: input_signature: A dict whose keys are strings and values are `Tensor`s, `SparseTensor`s, or `RaggedTensor`s. replaced_tensors_ready: a dict from `Tensor`, `SparseTensor`s, or `RaggedTensor`s to bool indicating whether the tensor is ready in this phase. Returns: a dictionary from source tensors to _SourceInfos. """ result = {} for tensor_or_op, is_ready in six.iteritems(replaced_tensors_ready): for component in _decompose_tensor_or_op( tf_utils.deref_tensor_or_op(tensor_or_op)): result[tf_utils.hashable_tensor_or_op(component)] = _SourceInfo( is_ready, None) for name, tensor in six.iteritems(input_signature): if isinstance(tensor, tf.Tensor): _set_unique_value_in_dict(result, tensor, _SourceInfo(True, '{}$tensor'.format(name))) elif isinstance(tensor, composite_tensor.CompositeTensor): for idx, tensor_component in enumerate(_decompose_tensor_or_op(tensor)): _set_unique_value_in_dict( result, tensor_component, _SourceInfo(True, '{}$composite_tensor_{}'.format(name, idx))) else: raise TypeError( 'Expected Tensor, or CompositeTensor, got {} of type {}'.format( tensor, type(tensor))) return result
Example #11
Source File: From transform with Apache License 2.0 | 5 votes |
def ready_to_run(self, tensor_or_op): """Determine if a given tensor or op is ready to run. A tensor is ready to run if every tensor in all its transitive dependencies are set to `True` in `known_ready`. Note that if a placeholder is encountered, this will result in an error as it is assumed that all placeholders are keys in `known_ready`. This is to avoid unexpected behavior when the user creates placeholders (as opposed to placeholders created by the tf.Transform framework). Similarly encountering a Table op is an error because a table should be a key in `known_ready` (in the case of analyzing the main session run) or should not be encountered (in the case of analyzing the graph init run). Args: tensor_or_op: A `Tensor`, `SparseTensor`, `RaggedTensor` or `Operation` Returns: A bool indicating whether then tensor is ready to run. Raises: ValueError: If a placeholder or table is encountered. _UnexpectedTableError: If an initializable table op is encountered. _UnexpectedPlaceholderError: If a placeholder is encountered. """ if not isinstance( tensor_or_op, (tf.Tensor, tf.SparseTensor, tf.RaggedTensor, tf.Operation)): raise TypeError( 'Expected Tensor, SparseTensor, RaggedTensor, or Operation got {} of type {}' .format(tensor_or_op, type(tensor_or_op))) return all( self.analyze_tensor(component).is_ready_to_run for component in _decompose_tensor_or_op(tensor_or_op))
Example #12
Source File: From transform with Apache License 2.0 | 5 votes |
def test_ragged_roundtrip(self): if not hasattr(meta_graph_pb2.TensorInfo, 'CompositeTensor'): self.skipTest('This version of TensorFlow does not support ' 'CompositeTenors in TensorInfo.') export_path = os.path.join(tempfile.mkdtemp(), 'export') with tf.compat.v1.Graph().as_default(): with tf.compat.v1.Session().as_default() as session: input_float = tf.compat.v1.ragged.placeholder(tf.float32, ragged_rank=1, value_shape=[]) output = input_float / 2.0 inputs = {'input': input_float} outputs = {'output': output} saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path) splits = np.array([0, 2, 3], dtype=np.int64) values = np.array([1.0, 2.0, 4.0], dtype=np.float32) input_ragged = tf.RaggedTensor.from_row_splits(values, splits) # Using a computed input gives confidence that the graphs are fused inputs = {'input': input_ragged * 10} saved_model_loader = saved_transform_io_v2.SavedModelLoader(export_path) outputs = saved_model_loader.apply_v1_transform_model_in_v2(inputs) result = outputs['output'] self.assertIsInstance(result, tf.RaggedTensor) # indices and shape unchanged; values multipled by 10 and divided by 2 self.assertAllEqual(splits, result.row_splits) self.assertEqual([5.0, 10.0, 20.0], result.values.numpy().tolist())
Example #13
Source File: From OpenNMT-tf with MIT License | 5 votes |
def add_sequence_controls(ids, length, start_id=None, end_id=None): """Adds sequence control tokens. Args: ids: Sequence of ids as 1D or 2D (batch) tensor. length: Sequence length as 0D or 1D (batch) tensor. start_id: Id to prepend to the sequence (set ``None`` to disable). end_id: Id to append to the sequence (set ``None`` to disable). Returns: A tuple ``(ids, length)``. """ rank = ids.shape.rank if rank not in (1, 2): raise ValueError("Unsupported rank %d (expected 1 or 2)" % rank) batch_size = tf.shape(ids)[0] if rank == 2 else None def _make_column(value): value = tf.constant(value, dtype=ids.dtype) if batch_size is not None: value = tf.fill([batch_size], value) return tf.expand_dims(value, -1) if start_id is not None: start_ids = _make_column(constants.START_OF_SENTENCE_ID) ids = tf.concat([start_ids, ids], axis=-1) length += 1 if end_id is not None: end_ids = _make_column(constants.END_OF_SENTENCE_ID) if batch_size is not None: # Run concat on RaggedTensor to handle sequences with variable length. ids = tf.RaggedTensor.from_tensor(ids, lengths=length) ids = tf.concat([ids, end_ids], axis=-1) if batch_size is not None: ids = ids.to_tensor() length += 1 return ids, length
Example #14
Source File: From transform with Apache License 2.0 | 5 votes |
def word_count(tokens, name=None): """Find the token count of each document/row. `tokens` is either a `RaggedTensor` or `SparseTensor`, representing tokenized strings. This function simply returns size of each row, so the dtype is not constrained to string. Args: tokens: either (1) a two-dimensional `SparseTensor`, or (2) a `RaggedTensor` with ragged rank of 1, non-ragged rank of 1 of dtype `tf.string` containing tokens to be counted name: (Optional) A name for this operation. Returns: A one-dimensional `Tensor` the token counts of each row. Raises: ValueError: if tokens is neither sparse nor ragged """ with tf.compat.v1.name_scope(name, 'word_count'): if isinstance(tokens, tf.RaggedTensor): return tokens.row_lengths() elif isinstance(tokens, tf.SparseTensor): result = tf.sparse.reduce_sum( tf.SparseTensor(indices=tokens.indices, values=tf.ones_like(tokens.values, dtype=tf.int64), dense_shape=tokens.dense_shape), axis=1) result.set_shape([tokens.shape[0]]) return result else: raise ValueError('Invalid token tensor')
Example #15
Source File: From OpenNMT-tf with MIT License | 5 votes |
def _testTokenizerOnBatchTensor(self, tokenizer, text, ref_tokens): text = tf.constant(text) tokens = tokenizer.tokenize(text) self.assertIsInstance(tokens, tf.RaggedTensor) self.assertAllEqual(tokens.to_list(), tf.nest.map_structure(tf.compat.as_bytes, ref_tokens))
Example #16
Source File: From OpenNMT-tf with MIT License | 5 votes |
def tokens_to_words(tokens, subword_token="■", is_spacer=None): """Converts a sequence of tokens to a sequence of words. Example: >>>["He@@", "llo", "W@@", "orld", "@@!"], subword_token="@@") <tf.RaggedTensor [[b'He@@', b'llo'], [b'W@@', b'orld', b'@@!']]> Args: tokens: A 1D string ``tf.Tensor``. subword_token: The special token used by the subword tokenizer. is_spacer: Whether :obj:`subword_token` is used as a spacer (as in SentencePiece) or a joiner (as in BPE). If ``None``, will infer directly from :obj:`subword_token`. Returns: The words as a 2D string ``tf.RaggedTensor``. """ if is_spacer is None: is_spacer = subword_token == "▁" if is_spacer: # First token implicitly starts with a spacer. left_and_single = tf.logical_or( tf.strings.regex_full_match(tokens, "%s.*" % subword_token), tf.one_hot(0, tf.shape(tokens)[0], on_value=True, off_value=False)) right = tf.strings.regex_full_match(tokens, ".+%s" % subword_token) word_start = tf.logical_or(tf.roll(right, shift=1, axis=0), left_and_single) else: right = tf.strings.regex_full_match(tokens, ".*%s" % subword_token) left = tf.strings.regex_full_match(tokens, "%s.*" % subword_token) subword = tf.logical_or(tf.roll(right, shift=1, axis=0), left) word_start = tf.logical_not(subword) start_indices = tf.squeeze(tf.where(word_start), -1) return tf.RaggedTensor.from_row_starts(tokens, start_indices)
Example #17
Source File: From OpenNMT-tf with MIT License | 5 votes |
def tokens_to_chars(tokens): """Splits tokens into unicode characters. Example: >>>["hello", "world"]) <tf.RaggedTensor [[b'h', b'e', b'l', b'l', b'o'], [b'w', b'o', b'r', b'l', b'd']]> Args: tokens: A string ``tf.Tensor`` of shape :math:`[T]`. Returns: The characters as a 2D string ``tf.RaggedTensor``. """ return tf.strings.unicode_split(tokens, "UTF-8")
Example #18
Source File: From OpenNMT-tf with MIT License | 5 votes |
def _detokenize_batch_tensor(self, tokens, sequence_length): ragged = tf.RaggedTensor.from_tensor(tokens, lengths=sequence_length) return self._detokenize_ragged_tensor(ragged)
Example #19
Source File: From OpenNMT-tf with MIT License | 5 votes |
def _detokenize_ragged_tensor(self, tokens): """Detokenizes a batch of tokens as a ``tf.RaggedTensor`` When not overriden, this default implementation calls _detokenize_batch_tensor on the dense representation. Args: tokens: A 2-D ``tf.RaggedTensor``. Returns: A 1-D string ``tf.Tensor``. """ return self._detokenize_batch_tensor(tokens.to_tensor(), tokens.row_lengths())
Example #20
Source File: From OpenNMT-tf with MIT License | 5 votes |
def make_features(self, element=None, features=None, training=None): """Tokenizes raw text.""" if features is None: features = {} if "tokens" in features: return features if "text" in features: element = features.pop("text") tokens = self.tokenizer.tokenize(element) if isinstance(tokens, tf.RaggedTensor): length = tokens.row_lengths() tokens = tokens.to_tensor() else: length = tf.shape(tokens)[0] if training and self.noiser is not None: noisy_tokens, noisy_length = self.noiser(tokens, keep_shape=False) if self.in_place_noise: tokens, length = tf.cond( tf.random.uniform([]) < self.noise_probability, true_fn=lambda: (noisy_tokens, noisy_length), false_fn=lambda: (tokens, length)) else: # Call make_features again to fill the remaining noisy features. noisy_features = dict(tokens=noisy_tokens, length=noisy_length) noisy_features = self.make_features(features=noisy_features, training=training) for key, value in noisy_features.items(): features["noisy_%s" % key] = value features["length"] = length features["tokens"] = tokens return features
Example #21
Source File: From transform with Apache License 2.0 | 4 votes |
def _feature_spec_from_batched_tensors(tensors): """Infer a feature spec from a dict of tensors. Args: tensors: A dict whose keys are strings and values are `Tensor` or `SparseTensor`s. Returns: A feature spec inferred from the types and shapes of the tensors. Raises: ValueError: If the feature spec cannot be inferred. TypeError: If any of the values of `tensors` are not a `Tensor` or `SparseTensor`. """ feature_spec = {} for name, tensor in six.iteritems(tensors): tensor = tensors[name] if tensor.dtype not in (tf.string, tf.int64, tf.float32): raise ValueError('Feature {} ({}) had invalid dtype {} for feature spec' .format(name, tensor, tensor.dtype)) if isinstance(tensor, tf.SparseTensor): shape = tensor.get_shape() if shape.ndims != 2: raise ValueError( 'Feature {} ({}) had invalid shape {} for VarLenFeature: must have ' 'rank 2'.format(name, tensor, shape)) feature_spec[name] = elif isinstance(tensor, tf.Tensor): shape = tensor.get_shape() if shape.ndims in [None, 0]: raise ValueError( 'Feature {} ({}) had invalid shape {} for FixedLenFeature: must ' 'have rank at least 1'.format(name, tensor, shape)) if any(dim is None for dim in shape.as_list()[1:]): raise ValueError( 'Feature {} ({}) had invalid shape {} for FixedLenFeature: apart ' 'from the batch dimension, all dimensions must have known size' .format(name, tensor, shape)) feature_spec[name] =[1:], tensor.dtype) elif isinstance(tensor, tf.RaggedTensor): tf.compat.v1.logging.warn( 'Feature %s was a RaggedTensor. A Schema will be generated but the ' 'Schema cannot be used with a coder (e.g. to materialize output ' 'data) or to generated a feature spec.', name) # Arbitrarily select VarLenFeature. feature_spec[name] = else: raise TypeError( 'Expected a Tensor or SparseTensor, got {} of type {} for feature {}' .format(tensor, type(tensor), name)) return feature_spec
Example #22
Source File: From neural-structured-learning with Apache License 2.0 | 4 votes |
def make_missing_neighbor_inputs(neighbor_config, inputs, weight_dtype=tf.float32): """Makes additional inputs for neighbor features if necessary. Args: neighbor_config: An instance of `configs.GraphNeighborConfig` specifying the number of neighbors and how neighbor features should be named. inputs: Dictionary of input tensors that may be missing neighbor features. The keys are the features names. See `utils.unpack_neighbor_features` for expected names of neighbor features and weights. weight_dtype: `tf.Dtype` for neighbors weights. Defaults to `tf.float32`. Returns: A dictionary of neighbor feature and weight tensors that do not already exist in `inputs`. The keys are specified according to `neighbor_config`. """ existing_feature_names = set(inputs.keys()) neighbor_inputs = {} for i in range(neighbor_config.max_neighbors): # For each potential neighbor. # Weight of the neighbor. weight_name = '{}{}{}'.format(neighbor_config.prefix, i, neighbor_config.weight_suffix) if weight_name not in existing_feature_names: neighbor_inputs[weight_name] = tf.keras.Input((1,), dtype=weight_dtype, name=weight_name) # For inputs without existing neighbor features, replicate them. for feature_name, tensor in inputs.items(): if feature_name.startswith(neighbor_config.prefix): continue neighbor_feature_name = '{}{}_{}'.format(neighbor_config.prefix, i, feature_name) if neighbor_feature_name not in existing_feature_names: neighbor_inputs[neighbor_feature_name] = tf.keras.Input( tensor.shape[1:], batch_size=tensor.shape[0], dtype=tensor.dtype, name=neighbor_feature_name, ragged=isinstance(tensor, tf.RaggedTensor), sparse=isinstance(tensor, tf.sparse.SparseTensor)) return neighbor_inputs
Example #23
Source File: From tfx-bsl with Apache License 2.0 | 4 votes |
def _MakeRaggedTensorDTypesTestCases(): result = [] tensor_representation_textpb = """ ragged_tensor { feature_path { step: "ragged_feature" } } """ for t in _ALL_SUPPORTED_VALUE_TYPES: for list_type_factory in (("list", pa.list_), ("large_list", pa.large_list)): expected_type_spec = tf.RaggedTensorSpec([None, None], _ARROW_TYPE_TO_TF_TYPE[t], ragged_rank=1, row_splits_dtype=tf.int64) if pa.types.is_integer(t): values = [[1, 2], None, [], [3]] expected_values = [1, 2, 3] elif pa.types.is_floating(t): values = [[1.0, 2.0], None, [], [3.0]] expected_values = [1.0, 2.0, 3.0] else: values = [[b"a", b"b"], None, [], [b"c"]] expected_values = [b"a", b"b", b"c"] row_splits = np.asarray([0, 2, 2, 2, 3], dtype=np.int64) if tf.executing_eagerly(): expected_output = tf.RaggedTensor.from_row_splits( values=tf.constant( expected_values, dtype=_ARROW_TYPE_TO_TF_TYPE[t]), row_splits=row_splits) else: expected_output = tf.compat.v1.ragged.RaggedTensorValue( values=np.array(expected_values, _ARROW_TYPE_TO_NP_TYPE[t]), row_splits=row_splits) result.append({ "testcase_name": "1D_{}_{}".format(t, list_type_factory[0]), "tensor_representation_textpb": tensor_representation_textpb, "record_batch": pa.RecordBatch.from_arrays( [pa.array(values, type=list_type_factory[1](t))], ["ragged_feature"]), "expected_ragged_tensor": expected_output, "expected_type_spec": expected_type_spec, }) return result