Python tensorflow.regex_replace() Examples
The following are 3
code examples of tensorflow.regex_replace().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow
, or try the search function
.

Example #1
Source File: input_fn.py From professional-services with Apache License 2.0 | 6 votes |
def parse_raw_text(sentence): """Splits text tensor by word to sparse sequence of tokens. Args: sentence: `tf.string`, with text record to split. Returns: Dictionary mapping feature name to tensors with the following entries `constants.TOKENS` mapping to a `SparseTensor` and `constants.SEQUENCE_LENGTH` mapping to a one-dimensional integer `Tensor`. """ tokens = tf.regex_replace(sentence, _CHAR_TO_FILTER_OUT, ' ', replace_global=True) sparse_sequence = tf.string_split(tokens) features = { constants.TOKENS: sparse_sequence, constants.SEQUENCE_LENGTH: get_sparse_tensor_size(sparse_sequence) } return features
Example #2
Source File: logistic_regression.py From tf-encrypted with Apache License 2.0 | 5 votes |
def provide_data(self): def decode(line): fields = tf.string_split([line], self.field_delim).values if self.index: # Skip index fields = fields[1:] fields = tf.regex_replace(fields, "|".join(self.na_values), "nan") fields = tf.string_to_number(fields, tf.float32) return fields def fill_na(fields, fill_values): fields = tf.where(tf.is_nan(fields), fill_values, fields) return fields dataset = tf.data.TextLineDataset(self.local_data_file) if self.header: # Skip header dataset = dataset.skip(1) dataset = ( dataset.map(decode) .map(lambda x: fill_na(x, self.data_schema.field_defaults)) .repeat() .batch(self.batch_size) ) iterator = dataset.make_one_shot_iterator() batch = iterator.get_next() batch = tf.reshape(batch, [self.batch_size, self.data_schema.field_num]) return batch
Example #3
Source File: metrics.py From BERT with Apache License 2.0 | 4 votes |
def word_error_rate(raw_predictions, labels, lookup=None, weights_fn=common_layers.weights_nonzero): """Calculate word error rate. Args: raw_predictions: The raw predictions. labels: The actual labels. lookup: A tf.constant mapping indices to output tokens. weights_fn: Weighting function. Returns: The word error rate. """ def from_tokens(raw, lookup_): gathered = tf.gather(lookup_, tf.cast(raw, tf.int32)) joined = tf.regex_replace(tf.reduce_join(gathered, axis=1), b"<EOS>.*", b"") cleaned = tf.regex_replace(joined, b"_", b" ") tokens = tf.string_split(cleaned, " ") return tokens def from_characters(raw, lookup_): """Convert ascii+2 encoded codes to string-tokens.""" corrected = tf.bitcast( tf.clip_by_value(tf.subtract(raw, 2), 0, 255), tf.uint8) gathered = tf.gather(lookup_, tf.cast(corrected, tf.int32))[:, :, 0] joined = tf.reduce_join(gathered, axis=1) cleaned = tf.regex_replace(joined, b"\0", b"") tokens = tf.string_split(cleaned, " ") return tokens if lookup is None: lookup = tf.constant([chr(i) for i in range(256)]) convert_fn = from_characters else: convert_fn = from_tokens if weights_fn is not common_layers.weights_nonzero: raise ValueError("Only weights_nonzero can be used for this metric.") with tf.variable_scope("word_error_rate", values=[raw_predictions, labels]): raw_predictions = tf.squeeze( tf.argmax(raw_predictions, axis=-1), axis=(2, 3)) labels = tf.squeeze(labels, axis=(2, 3)) reference = convert_fn(labels, lookup) predictions = convert_fn(raw_predictions, lookup) distance = tf.reduce_sum( tf.edit_distance(predictions, reference, normalize=False)) reference_length = tf.cast( tf.size(reference.values, out_type=tf.int32), dtype=tf.float32) return distance / reference_length, reference_length