Python tensorflow.string_split() Examples
The following are 30
code examples of tensorflow.string_split().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow
, or try the search function
.
Example #1
Source File: inference.py From fine-lm with MIT License | 6 votes |
def load_data(input_file, input_vocab): """Returns an iterator over the input file. Args: input_file: The input text file. input_vocab: The input vocabulary. Returns: A dataset batch iterator. """ dataset = tf.data.TextLineDataset(input_file) dataset = dataset.map(lambda x: tf.string_split([x]).values) dataset = dataset.map(input_vocab.lookup) dataset = dataset.map(lambda x: { "ids": x, "length": tf.shape(x)[0]}) dataset = dataset.padded_batch(64, { "ids": [None], "length": []}) return dataset.make_initializable_iterator()
Example #2
Source File: split_tokens_decoder.py From reaction_prediction_seq2seq with Apache License 2.0 | 6 votes |
def decode(self, data, items): decoded_items = {} # Split tokens tokens = tf.string_split([data], delimiter=self.delimiter).values # Optionally prepend a special token if self.prepend_token is not None: tokens = tf.concat([[self.prepend_token], tokens], 0) # Optionally append a special token if self.append_token is not None: tokens = tf.concat([tokens, [self.append_token]], 0) decoded_items[self.length_feature_name] = tf.size(tokens) decoded_items[self.tokens_feature_name] = tokens return [decoded_items[_] for _ in items]
Example #3
Source File: input_fn.py From professional-services with Apache License 2.0 | 6 votes |
def parse_raw_text(sentence): """Splits text tensor by word to sparse sequence of tokens. Args: sentence: `tf.string`, with text record to split. Returns: Dictionary mapping feature name to tensors with the following entries `constants.TOKENS` mapping to a `SparseTensor` and `constants.SEQUENCE_LENGTH` mapping to a one-dimensional integer `Tensor`. """ tokens = tf.regex_replace(sentence, _CHAR_TO_FILTER_OUT, ' ', replace_global=True) sparse_sequence = tf.string_split(tokens) features = { constants.TOKENS: sparse_sequence, constants.SEQUENCE_LENGTH: get_sparse_tensor_size(sparse_sequence) } return features
Example #4
Source File: content.py From ConMask with MIT License | 6 votes |
def multiple_content_lookup(content, vocab_table, ids, name=None): """ :param content: :param vocab_table: :param ids: :param name: :return: 2-D [batch_size, max_length_in_batch] content id matrix, 1-D [batch_size] content len vector """ with tf.name_scope(name, 'multiple_content_lookup', [content, vocab_table, ids]): content_list = tf.nn.embedding_lookup(content, ids) extracted_sparse_content = tf.string_split(content_list, delimiter=' ') sparse_content = tf.SparseTensor(indices=extracted_sparse_content.indices, values=vocab_table.lookup(extracted_sparse_content.values), dense_shape=extracted_sparse_content.dense_shape) extracted_content_ids = tf.sparse_tensor_to_dense(sparse_content, default_value=0, name='dense_content') extracted_content_len = tf.reduce_sum(tf.cast(tf.not_equal(extracted_content_ids, 0), tf.int32), axis=-1) return extracted_content_ids, extracted_content_len
Example #5
Source File: content.py From ConMask with MIT License | 6 votes |
def entity_content_embedding_lookup(entities, content, content_len, vocab_table, word_embedding, str_pad, name=None): """ Lookup entity word embeddings given a flatten 1-D entity id list and content lookup table :param entities: Must be a 1-D entity vector :param content: :param content_len: :param vocab_table: :param word_embedding: :param str_pad: :param name: :return: """ with tf.device('/cpu:0'): with tf.name_scope(name, 'entity_content_lookup', [entities, content, content_len, vocab_table, word_embedding]): ent_content = tf.string_split(tf.nn.embedding_lookup(content, entities, name='ent_content'), delimiter=' ') content_len = tf.nn.embedding_lookup(content_len, entities, name='ent_content_len') ent_content_dense = tf.sparse_tensor_to_dense(ent_content, default_value=str_pad, name='ent_content_dense') ent_embedding = tf.nn.embedding_lookup(word_embedding, vocab_table.lookup(ent_content_dense, name='ent_content_ids')) return ent_embedding, content_len
Example #6
Source File: corruption.py From ConMask with MIT License | 6 votes |
def get_true_tails(ent_rel_str, targets_lookup_table, targets, name=None): """ Given ent \t rel pair return a list of string targets :param ent_rel_str: :param targets_lookup_table: :param name: :return: """ with tf.name_scope(name, 'get_true_tails', [ent_rel_str, targets_lookup_table, targets]): target_entities_lookup_id = targets_lookup_table.lookup(ent_rel_str) # CHECK IF WE HAVE -1 HERE, if so the error will be have a -2 that is out of the range target_entities_lookup_id = tf.where(tf.equal(target_entities_lookup_id, -1), target_entities_lookup_id - 1, target_entities_lookup_id) # sparseTensor str_targets = tf.string_split(tf.nn.embedding_lookup(targets, target_entities_lookup_id), delimiter=' ') return str_targets.values
Example #7
Source File: split_tokens_decoder.py From natural-language-summary-generation-from-structured-data with MIT License | 6 votes |
def decode(self, data, items): decoded_items = {} # Split tokens tokens = tf.string_split([data], delimiter=self.delimiter).values # Optionally prepend a special token if self.prepend_token is not None: tokens = tf.concat([[self.prepend_token], tokens], 0) # Optionally append a special token if self.append_token is not None: tokens = tf.concat([tokens, [self.append_token]], 0) decoded_items[self.length_feature_name] = tf.size(tokens) decoded_items[self.tokens_feature_name] = tokens return [decoded_items[_] for _ in items]
Example #8
Source File: data_util.py From reading_comprehension_tf with Apache License 2.0 | 6 votes |
def generate_word_feat(sentence, word_vocab_index, word_max_length, word_pad, word_sos, word_eos, word_placeholder_enable): """generate word feature for sentence""" words = tf.string_split([sentence], delimiter=' ').values if word_placeholder_enable == True: words = tf.concat([[word_sos], words[:word_max_length], [word_eos], tf.constant(word_pad, shape=[word_max_length])], axis=0) word_max_length = word_max_length + 2 else: words = tf.concat([words[:word_max_length], tf.constant(word_pad, shape=[word_max_length])], axis=0) words = tf.reshape(words[:word_max_length], shape=[word_max_length]) words = tf.cast(word_vocab_index.lookup(words), dtype=tf.int32) words = tf.expand_dims(words, axis=-1) return words
Example #9
Source File: data_util.py From reading_comprehension_tf with Apache License 2.0 | 6 votes |
def create_trg_dataset(input_dataset, input_data_type, word_vocab_index, word_max_length, word_pad, word_sos, word_eos, word_placeholder_enable, num_parallel): """create dataset for input target data""" dataset = input_dataset if input_data_type == "span": dataset = dataset.map(lambda span: tf.string_split([span], delimiter='|').values, num_parallel_calls=num_parallel) dataset = dataset.map(lambda span: tf.string_to_number(span, out_type=tf.int32), num_parallel_calls=num_parallel) dataset = dataset.map(lambda span: tf.expand_dims(span, axis=-1), num_parallel_calls=num_parallel) elif input_data_type == "text": dataset = dataset.map(lambda sent: generate_word_feat(sent, word_vocab_index, word_max_length, word_pad, word_sos, word_eos, word_placeholder_enable), num_parallel_calls=num_parallel) return dataset
Example #10
Source File: word2vec.py From tensorflow_nlp with Apache License 2.0 | 6 votes |
def read_word_freq(filename): filename_queue = tf.train.string_input_producer([filename]) reader = tf.WholeFileReader() key, value = reader.read(filename_queue) lines = tf.string_split([value], "\n") with tf.Session() as sess: # Start populating the filename queue. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) sess.run([lines]) lines_eval = lines.eval() result = [] for line in lines_eval.values: s = line.split() result.append((s[0], int(s[1]))) coord.request_stop() coord.join(threads) return result
Example #11
Source File: utils.py From conv-ensemble-str with Apache License 2.0 | 6 votes |
def get_label(self, text, null_character=u'\u2591'): """ Returns the ids of the corresponding text, Args: text: a tensor with shape [batch_size, lexicon_size] and type string null_character: a unicode character used to replace '<null>' character. the default value is a light shade block '░'. """ batch_size = text.shape[0].value lexicon_size = text.shape[1].value text = tf.reshape(text, [-1]) sp_text = tf.string_split(text, delimiter='') sp_text = tf.sparse_reset_shape(sp_text, [batch_size*lexicon_size, self.max_sequence_length]) sp_text = tf.sparse_tensor_to_dense(sp_text, default_value=null_character) ids = self.invert_table.lookup(sp_text) ids = tf.reshape(ids, [batch_size, lexicon_size, self.max_sequence_length]) return tf.to_int32(ids)
Example #12
Source File: string_split_op_test.py From deep_image_model with Apache License 2.0 | 6 votes |
def testStringSplitWithDelimiterTensor(self): strings = ["hello|world", "hello world"] with self.test_session() as sess: delimiter = tf.placeholder(tf.string) tokens = tf.string_split(strings, delimiter=delimiter) with self.assertRaises(tf.errors.InvalidArgumentError): sess.run(tokens, feed_dict={delimiter: ["a", "b"]}) with self.assertRaises(tf.errors.InvalidArgumentError): sess.run(tokens, feed_dict={delimiter: ["a"]}) with self.assertRaises(tf.errors.InvalidArgumentError): sess.run(tokens, feed_dict={delimiter: "abc"}) indices, values, shape = sess.run(tokens, feed_dict={delimiter: "|"}) self.assertAllEqual(indices, [[0, 0], [0, 1], [1, 0]]) self.assertAllEqual(values, [b"hello", b"world", b"hello world"]) self.assertAllEqual(shape, [2, 2])
Example #13
Source File: string_split_op_test.py From deep_image_model with Apache License 2.0 | 6 votes |
def testStringSplitWithDelimiter(self): strings = ["hello|world", "hello world"] with self.test_session() as sess: self.assertRaises( ValueError, tf.string_split, strings, delimiter="delimiter") self.assertRaises( ValueError, tf.string_split, strings, delimiter=["|", ""]) self.assertRaises(ValueError, tf.string_split, strings, delimiter=["a"]) tokens = tf.string_split(strings, delimiter="|") indices, values, shape = sess.run(tokens) self.assertAllEqual(indices, [[0, 0], [0, 1], [1, 0]]) self.assertAllEqual(values, [b"hello", b"world", b"hello world"]) self.assertAllEqual(shape, [2, 2])
Example #14
Source File: dataset.py From Document-Transformer with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_inference_input_ctx(inputs, ctxs, params): with tf.device("/cpu:0"): dataset = tf.data.Dataset.from_tensor_slices( tf.constant(inputs) ) # Split string dataset = dataset.map(lambda x: tf.string_split([x]).values, num_parallel_calls=params.num_threads) # Append <eos> dataset = dataset.map( lambda x: tf.concat([x, [tf.constant(params.eos)]], axis=0), num_parallel_calls=params.num_threads ) # Convert tuple to dictionary dataset = dataset.map( lambda x: {"source": x, "source_length": tf.shape(x)[0]}, num_parallel_calls=params.num_threads ) dataset = dataset.padded_batch( params.decode_batch_size * len(params.device_list), {"source": [tf.Dimension(None)], "source_length": []}, {"source": params.pad, "source_length": 0} ) iterator = dataset.make_one_shot_iterator() features = iterator.get_next() src_table = tf.contrib.lookup.index_table_from_tensor( tf.constant(params.vocabulary["source"]), default_value=params.mapping["source"][params.unk] ) features["source"] = src_table.lookup(features["source"]) return features
Example #15
Source File: dataset.py From Document-Transformer with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_inference_input(inputs, params): with tf.device("/cpu:0"): dataset = tf.data.Dataset.from_tensor_slices( tf.constant(inputs) ) # Split string dataset = dataset.map(lambda x: tf.string_split([x]).values, num_parallel_calls=params.num_threads) # Append <eos> dataset = dataset.map( lambda x: tf.concat([x, [tf.constant(params.eos)]], axis=0), num_parallel_calls=params.num_threads ) # Convert tuple to dictionary dataset = dataset.map( lambda x: {"source": x, "source_length": tf.shape(x)[0]}, num_parallel_calls=params.num_threads ) dataset = dataset.padded_batch( params.decode_batch_size * len(params.device_list), {"source": [tf.Dimension(None)], "source_length": []}, {"source": params.pad, "source_length": 0} ) iterator = dataset.make_one_shot_iterator() features = iterator.get_next() src_table = tf.contrib.lookup.index_table_from_tensor( tf.constant(params.vocabulary["source"]), default_value=params.mapping["source"][params.unk] ) features["source"] = src_table.lookup(features["source"]) return features
Example #16
Source File: data_util.py From reading_comprehension_tf with Apache License 2.0 | 5 votes |
def generate_subword_feat(sentence, subword_vocab_index, word_max_length, subword_max_length, subword_size, word_sos, word_eos, word_placeholder_enable, subword_pad): def word_to_subword(word): """generate subwords for word""" word_len = tf.size(tf.string_split([word], delimiter='')) subwords = tf.substr([word], 0, subword_size) for i in range(1, subword_max_length): subwords = tf.cond(i+subword_size-1 < word_len, lambda: tf.concat([subwords, tf.substr([word], i, subword_size)], 0), lambda: subwords) subwords = tf.concat([subwords[:subword_max_length], tf.constant(subword_pad, shape=[subword_max_length])], axis=0) subwords = tf.reshape(subwords[:subword_max_length], shape=[subword_max_length]) return subwords """generate subword feature for sentence""" words = tf.string_split([sentence], delimiter=' ').values if word_placeholder_enable == True: words = tf.concat([[word_sos], words[:word_max_length], [word_eos], tf.constant(subword_pad, shape=[word_max_length])], axis=0) word_max_length = word_max_length + 2 else: words = tf.concat([words[:word_max_length], tf.constant(subword_pad, shape=[word_max_length])], axis=0) words = tf.reshape(words[:word_max_length], shape=[word_max_length]) word_subwords = tf.map_fn(word_to_subword, words) word_subwords = tf.cast(subword_vocab_index.lookup(word_subwords), dtype=tf.int32) return word_subwords
Example #17
Source File: dataset_utils.py From TwinGAN with Apache License 2.0 | 5 votes |
def tensors_to_item(self, keys_to_tensors): unmapped_tensor = super(OneHotLabelTensor, self).tensors_to_item(keys_to_tensors) labels_text_split = tf.string_split([unmapped_tensor], delimiter=self._delimiter) tensor = self._table.lookup(labels_text_split.values) tensor = util_misc.safe_one_hot_encoding(tensor, self._num_classes, dtype=self._dtype) return tensor ##################### # tf example parser # ##################### # tf example parser functions. Some are taken from the tensorflow object detection repo.
Example #18
Source File: data_util.py From reading_comprehension_tf with Apache License 2.0 | 5 votes |
def generate_char_feat(sentence, char_vocab_index, word_max_length, char_max_length, word_sos, word_eos, word_placeholder_enable, char_pad): def word_to_char(word): """generate chars for word""" chars = tf.string_split([word], delimiter='').values chars = tf.concat([chars[:char_max_length], tf.constant(char_pad, shape=[char_max_length])], axis=0) chars = tf.reshape(chars[:char_max_length], shape=[char_max_length]) return chars """generate char feature for sentence""" words = tf.string_split([sentence], delimiter=' ').values if word_placeholder_enable == True: words = tf.concat([[word_sos], words[:word_max_length], [word_eos], tf.constant(char_pad, shape=[word_max_length])], axis=0) word_max_length = word_max_length + 2 else: words = tf.concat([words[:word_max_length], tf.constant(char_pad, shape=[word_max_length])], axis=0) words = tf.reshape(words[:word_max_length], shape=[word_max_length]) word_chars = tf.map_fn(word_to_char, words) word_chars = tf.cast(char_vocab_index.lookup(word_chars), dtype=tf.int32) return word_chars
Example #19
Source File: tf_example_decoder.py From aster with MIT License | 5 votes |
def _split_lexicon(self, keys_to_tensors): joined_lexicon = keys_to_tensors[fields.TfExampleFields.lexicon] lexicon_sparse = tf.string_split([joined_lexicon], delimiter='\t') lexicon = tf.sparse_tensor_to_dense(lexicon_sparse, default_value='')[0] return lexicon
Example #20
Source File: tokenizeddata.py From ChatLearner with Apache License 2.0 | 5 votes |
def get_inference_batch(self, src_dataset): text_dataset = src_dataset.map(lambda src: tf.string_split([src]).values) if self.hparams.src_max_len_infer: text_dataset = text_dataset.map(lambda src: src[:self.hparams.src_max_len_infer]) # Convert the word strings to ids id_dataset = text_dataset.map(lambda src: tf.cast(self.vocab_table.lookup(src), tf.int32)) if self.hparams.source_reverse: id_dataset = id_dataset.map(lambda src: tf.reverse(src, axis=[0])) # Add in the word counts. id_dataset = id_dataset.map(lambda src: (src, tf.size(src))) def batching_func(x): return x.padded_batch( self.hparams.batch_size_infer, # The entry is the source line rows; this has unknown-length vectors. # The last entry is the source row size; this is a scalar. padded_shapes=(tf.TensorShape([None]), # src tf.TensorShape([])), # src_len # Pad the source sequences with eos tokens. Though notice we don't generally need to # do this since later on we will be masking out calculations past the true sequence. padding_values=(self.hparams.eos_id, # src 0)) # src_len -- unused id_dataset = batching_func(id_dataset) infer_iter = id_dataset.make_initializable_iterator() (src_ids, src_seq_len) = infer_iter.get_next() return BatchedInput(initializer=infer_iter.initializer, source=src_ids, target_input=None, target_output=None, source_sequence_length=src_seq_len, target_sequence_length=None)
Example #21
Source File: logistic_regression.py From tf-encrypted with Apache License 2.0 | 5 votes |
def provide_data(self): def decode(line): fields = tf.string_split([line], self.field_delim).values if self.index: # Skip index fields = fields[1:] fields = tf.regex_replace(fields, "|".join(self.na_values), "nan") fields = tf.string_to_number(fields, tf.float32) return fields def fill_na(fields, fill_values): fields = tf.where(tf.is_nan(fields), fill_values, fields) return fields dataset = tf.data.TextLineDataset(self.local_data_file) if self.header: # Skip header dataset = dataset.skip(1) dataset = ( dataset.map(decode) .map(lambda x: fill_na(x, self.data_schema.field_defaults)) .repeat() .batch(self.batch_size) ) iterator = dataset.make_one_shot_iterator() batch = iterator.get_next() batch = tf.reshape(batch, [self.batch_size, self.data_schema.field_num]) return batch
Example #22
Source File: aby3.py From tf-encrypted with Apache License 2.0 | 5 votes |
def _read_(prot, filename_prefix, batch_size, n_columns): row_shape = [n_columns] def decode(line): fields = tf.string_split([line], ",").values fields = tf.strings.to_number(fields, tf.int64) fields = tf.reshape(fields, row_shape) return fields batch = [[None] * 2 for _ in range(3)] for i in range(3): with tf.device(prot.servers[i].device_name): for j in range(2): data = ( tf.data.TFRecordDataset( ["{}_share{}{}".format(filename_prefix, i, j)] ) .map(decode) .repeat() .batch(batch_size=batch_size) ) it = data.make_one_shot_iterator() batch[i][j] = it.get_next() batch[i][j] = tf.reshape(batch[i][j], [batch_size] + row_shape) batch[i][j] = prot.int_factory.tensor(batch[i][j]) return ABY3PrivateTensor(prot, batch, True, ARITHMETIC)
Example #23
Source File: data.py From tf_examples with Apache License 2.0 | 5 votes |
def make_input_fn(mode, filename_in, filename_out, in_vocab_file, out_vocab_file, batch_size, vocab_size, input_max_length, output_max_length, queue_capacity=10000, num_threads=10): def input_fn(): num_epochs = None if mode == tf.estimator.ModeKeys.TRAIN else 1 filename_in_queue = tf.train.string_input_producer( [filename_in], num_epochs=num_epochs) filename_out_queue = tf.train.string_input_producer( [filename_out], num_epochs=num_epochs) reader_in = tf.TextLineReader() reader_out = tf.TextLineReader() in_list, out_list = [], [] for _ in range(num_threads): in_list.append(reader_in.read(filename_in_queue)[1]) out_list.append(reader_out.read(filename_out_queue)[1]) tensor_in = reader_in.read(filename_in_queue)[1] tensor_out = reader_out.read(filename_out_queue)[1] if mode == tf.estimator.ModeKeys.TRAIN: inputs, outputs = tf.train.shuffle_batch( (tensor_in, tensor_out), batch_size, capacity=queue_capacity, min_after_dequeue=batch_size * 3, enqueue_many=True ) else: inputs, outputs = tf.train.batch( (tensor_in, tensor_out), batch_size, capacity=queue_capacity, allow_smaller_final_batch=True) # Preprocess inputs. inputs = utils.sparse_to_dense_trim(tf.string_split(inputs), output_shape=[batch_size, input_max_length], default_value='<\S>') outputs = utils.sparse_to_dense_trim(tf.string_split(outputs), output_shape=[batch_size, output_max_length], default_value='<\S>') tf.identity(inputs[0], name='inputs') tf.identity(outputs[0], name='outputs') in_vocab = tf.contrib.lookup.index_table_from_file(in_vocab_file, vocab_size=vocab_size, default_value=2) input_ids = in_vocab.lookup(inputs) out_vocab = tf.contrib.lookup.index_table_from_file(out_vocab_file, vocab_size=vocab_size, default_value=2) output_ids = out_vocab.lookup(outputs) return {'inputs': inputs_ids, 'outputs': outputs_ids}, None return input_fn
Example #24
Source File: main.py From NAO with GNU General Public License v3.0 | 5 votes |
def predict_input_fn(predict_from_file): dataset = tf.data.TextLineDataset(predict_from_file) def decode_record(record): src = tf.string_split([record]).values src = tf.string_to_number(src, out_type=tf.int32) return src, tf.constant([SOS], dtype=tf.int32) dataset = dataset.map(decode_record) dataset = dataset.batch(FLAGS.batch_size) iterator = dataset.make_one_shot_iterator() inputs, targets_inputs = iterator.get_next() assert inputs.shape.ndims == 2 return inputs, targets_inputs
Example #25
Source File: 2_adanet_avazu.py From deep-learning-note with MIT License | 5 votes |
def generator(ln): splits = tf.string_split([ln], delimiter=',') label = splits.values[0] # 解析 dense 部分 features = {} for i in range(1, 14): features['I'+str(i)] = tf.string_to_number(splits.values[i], tf.int64) return features, label
Example #26
Source File: decoder_main.py From NAO with GNU General Public License v3.0 | 5 votes |
def predict_from_file(estimator, batch_size, decode_from_file, decode_to_file=None): def infer_input_fn(): sos_id = tf.constant([SOS], dtype=tf.int32) dataset = tf.data.TextLineDataset(decode_from_file) def decode_record(record): src = tf.string_split([record]).values src = tf.string_to_number(src, out_type=tf.float32) return src, tf.constant([SOS], dtype=tf.int32) dataset = dataset.map(decode_record) dataset = dataset.batch(FLAGS.batch_size) iterator = dataset.make_one_shot_iterator() inputs, targets_inputs = iterator.get_next() assert inputs.shape.ndims == 2 #assert targets_inputs.shape.ndims == 2 return { 'inputs' : inputs, 'targets_inputs' : targets_inputs, 'targets' : None, }, None results = [] result_iter = estimator.predict(infer_input_fn) for result in result_iter: output = result['output'].flatten() output = ' '.join(map(str, output)) tf.logging.info('Inference results OUTPUT: %s' % output) results.append(output) if decode_to_file: output_filename = decode_to_file else: output_filename = '%s.result' % decode_from_file tf.logging.info('Writing results into {0}'.format(output_filename)) with tf.gfile.Open(output_filename, 'w') as f: for res in results: f.write('%s\n' % (res))
Example #27
Source File: string_split_op_test.py From deep_image_model with Apache License 2.0 | 5 votes |
def testStringSplitEmptyToken(self): strings = [" hello ", "", "world "] with self.test_session() as sess: tokens = tf.string_split(strings) indices, values, shape = sess.run(tokens) self.assertAllEqual(indices, [[0, 0], [2, 0]]) self.assertAllEqual(values, [b"hello", b"world"]) self.assertAllEqual(shape, [3, 1])
Example #28
Source File: string_split_op_test.py From deep_image_model with Apache License 2.0 | 5 votes |
def testStringSplitEmptyDelimiter(self): strings = ["hello", "hola", b"\xF0\x9F\x98\x8E"] # Last string is U+1F60E with self.test_session() as sess: tokens = tf.string_split(strings, delimiter="") indices, values, shape = sess.run(tokens) self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], [0, 3], [0, 4], [1, 0], [1, 1], [1, 2], [1, 3], [2, 0], [2, 1], [2, 2], [2, 3]]) expected = np.array( ['h', 'e', 'l', 'l', 'o', 'h', 'o', 'l', 'a', b'\xf0', b'\x9f', b'\x98', b'\x8e'], dtype='|S1') self.assertAllEqual(values.tolist(), expected) self.assertAllEqual(shape, [3, 5])
Example #29
Source File: string_split_op_test.py From deep_image_model with Apache License 2.0 | 5 votes |
def testStringSplit(self): strings = ["pigs on the wing", "animals"] with self.test_session() as sess: tokens = tf.string_split(strings) indices, values, shape = sess.run(tokens) self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], [0, 3], [1, 0]]) self.assertAllEqual(values, [b"pigs", b"on", b"the", b"wing", b"animals"]) self.assertAllEqual(shape, [2, 4])
Example #30
Source File: preprocessors.py From mead-baseline with Apache License 2.0 | 5 votes |
def lowercase(self, raw_post): split_chars = tf.string_split(tf.reshape(raw_post, [-1]), delimiter="").values upchar_inds = self.upchars_lut.lookup(split_chars) return tf.reduce_join(tf.map_fn(lambda x: tf.cond(x[0] > 25, lambda: x[1], lambda: self.lchars[x[0]]), (upchar_inds, split_chars), dtype=tf.string))