Python tensorflow.matching_files() Examples
The following are 16
code examples of tensorflow.matching_files().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow
, or try the search function
.
Example #1
Source File: input_pipeline_dask.py From professional-services with Apache License 2.0 | 6 votes |
def input_fn(self, name, csv_path=None): """Creates a dataset object for the model to consume. Input function for estimator Arguments: name : string, Name of the data [Train or Eval] csv_path : The path of the csv on any storage system Returns: features : tf.data.TextLineDataset object, Dataset containing batch of features labels : tf.data.TextLineDataset object, Dataset containing batch of labels """ pattern = self._get_pattern(name, csv_path) tf.logging.info('The Pattern of files is : %s', pattern) filenames = tf.matching_files(pattern=pattern) dataset = tf.data.TextLineDataset(filenames).skip(1).map( self.parse_csv, num_parallel_calls=cpu_count()) dataset = dataset.shuffle(buffer_size=self.batch_size * 100) dataset = dataset.apply(tf.contrib.data.ignore_errors()) dataset = dataset.repeat(self.num_epochs) dataset = dataset.batch(self.batch_size) # determine the ideal number dataset = dataset.prefetch(self.buffer_size) iterator = dataset.make_one_shot_iterator() feats, labs = iterator.get_next() return feats, labs
Example #2
Source File: dataset.py From keras_imagenet with MIT License | 6 votes |
def get_dataset(tfrecords_dir, subset, batch_size): """Read TFRecords files and turn them into a TFRecordDataset.""" files = tf.matching_files(os.path.join(tfrecords_dir, '%s-*' % subset)) shards = tf.data.Dataset.from_tensor_slices(files) shards = shards.shuffle(tf.cast(tf.shape(files)[0], tf.int64)) shards = shards.repeat() dataset = shards.interleave(tf.data.TFRecordDataset, cycle_length=4) dataset = dataset.shuffle(buffer_size=8192) parser = partial( _parse_fn, is_training=True if subset == 'train' else False) dataset = dataset.apply( tf.data.experimental.map_and_batch( map_func=parser, batch_size=batch_size, num_parallel_calls=config.NUM_DATA_WORKERS)) dataset = dataset.prefetch(batch_size) return dataset
Example #3
Source File: dataset_util.py From ros_people_object_detection_tensorflow with Apache License 2.0 | 5 votes |
def read_dataset(file_read_func, decode_func, input_files, config): """Reads a dataset, and handles repetition and shuffling. Args: file_read_func: Function to use in tf.data.Dataset.interleave, to read every individual file into a tf.data.Dataset. decode_func: Function to apply to all records. input_files: A list of file paths to read. config: A input_reader_builder.InputReader object. Returns: A tf.data.Dataset based on config. """ # Shard, shuffle, and read files. filenames = tf.concat([tf.matching_files(pattern) for pattern in input_files], 0) filename_dataset = tf.data.Dataset.from_tensor_slices(filenames) if config.shuffle: filename_dataset = filename_dataset.shuffle( config.filenames_shuffle_buffer_size) filename_dataset = filename_dataset.repeat(config.num_epochs or None) records_dataset = filename_dataset.apply( tf.contrib.data.parallel_interleave( file_read_func, cycle_length=config.num_readers, sloppy=True)) if config.shuffle: records_dataset.shuffle(config.shuffle_buffer_size) tensor_dataset = records_dataset.map( decode_func, num_parallel_calls=config.num_parallel_map_calls) return tensor_dataset.prefetch(config.prefetch_size)
Example #4
Source File: dataset_util.py From Gun-Detector with Apache License 2.0 | 5 votes |
def read_dataset(file_read_func, decode_func, input_files, config): """Reads a dataset, and handles repetition and shuffling. Args: file_read_func: Function to use in tf.data.Dataset.interleave, to read every individual file into a tf.data.Dataset. decode_func: Function to apply to all records. input_files: A list of file paths to read. config: A input_reader_builder.InputReader object. Returns: A tf.data.Dataset based on config. """ # Shard, shuffle, and read files. filenames = tf.concat([tf.matching_files(pattern) for pattern in input_files], 0) filename_dataset = tf.data.Dataset.from_tensor_slices(filenames) if config.shuffle: filename_dataset = filename_dataset.shuffle( config.filenames_shuffle_buffer_size) elif config.num_readers > 1: tf.logging.warning('`shuffle` is false, but the input data stream is ' 'still slightly shuffled since `num_readers` > 1.') filename_dataset = filename_dataset.repeat(config.num_epochs or None) records_dataset = filename_dataset.apply( tf.contrib.data.parallel_interleave( file_read_func, cycle_length=config.num_readers, block_length=config.read_block_length, sloppy=True)) if config.shuffle: records_dataset.shuffle(config.shuffle_buffer_size) tensor_dataset = records_dataset.map( decode_func, num_parallel_calls=config.num_parallel_map_calls) return tensor_dataset.prefetch(config.prefetch_size)
Example #5
Source File: io_ops_test.py From deep_image_model with Apache License 2.0 | 5 votes |
def testMatchingFiles(self): cases = ['ABcDEF.GH', 'ABzDEF.GH', 'ABasdfjklDEF.GH', 'AB3DEF.GH', 'AB4DEF.GH', 'ABDEF.GH', 'XYZ'] files = [tempfile.NamedTemporaryFile( prefix=c, dir=self.get_temp_dir()) for c in cases] with self.test_session(): # Test exact match without wildcards. for f in files: self.assertEqual(tf.matching_files(f.name).eval(), tf.compat.as_bytes(f.name)) # We will look for files matching "ABxDEF.GH*" where "x" is some wildcard. pos = files[0].name.find(cases[0]) pattern = files[0].name[:pos] + 'AB%sDEF.GH*' self.assertEqual(set(tf.matching_files(pattern % 'z').eval()), self._subset(files, [1])) self.assertEqual(set(tf.matching_files(pattern % '?').eval()), self._subset(files, [0, 1, 3, 4])) self.assertEqual(set(tf.matching_files(pattern % '*').eval()), self._subset(files, [0, 1, 2, 3, 4, 5])) self.assertEqual(set(tf.matching_files(pattern % '[cxz]').eval()), self._subset(files, [0, 1])) self.assertEqual(set(tf.matching_files(pattern % '[0-9]').eval()), self._subset(files, [3, 4]))
Example #6
Source File: dataset_util.py From ros_tensorflow with Apache License 2.0 | 5 votes |
def read_dataset(file_read_func, decode_func, input_files, config): """Reads a dataset, and handles repetition and shuffling. Args: file_read_func: Function to use in tf.data.Dataset.interleave, to read every individual file into a tf.data.Dataset. decode_func: Function to apply to all records. input_files: A list of file paths to read. config: A input_reader_builder.InputReader object. Returns: A tf.data.Dataset based on config. """ # Shard, shuffle, and read files. filenames = tf.concat([tf.matching_files(pattern) for pattern in input_files], 0) filename_dataset = tf.data.Dataset.from_tensor_slices(filenames) if config.shuffle: filename_dataset = filename_dataset.shuffle( config.filenames_shuffle_buffer_size) elif config.num_readers > 1: tf.logging.warning('`shuffle` is false, but the input data stream is ' 'still slightly shuffled since `num_readers` > 1.') filename_dataset = filename_dataset.repeat(config.num_epochs or None) records_dataset = filename_dataset.apply( tf.contrib.data.parallel_interleave( file_read_func, cycle_length=config.num_readers, block_length=config.read_block_length, sloppy=True)) if config.shuffle: records_dataset.shuffle(config.shuffle_buffer_size) tensor_dataset = records_dataset.map( decode_func, num_parallel_calls=config.num_parallel_map_calls) return tensor_dataset.prefetch(config.prefetch_size)
Example #7
Source File: input_util.py From professional-services with Apache License 2.0 | 5 votes |
def input_fn(input_dir, mode, batch_size, num_epochs, label_name=None, shuffle_buffer_size=10000, feature_spec=None): """Reads TFRecords and returns the features and labels.""" if feature_spec is None: tf_transform_output = tft.TFTransformOutput( os.path.join(input_dir, 'transformed_metadata')) feature_spec = tf_transform_output.transformed_feature_spec() prefix = str(mode).lower() suffix = '.tfrecord' num_cpus = multiprocessing.cpu_count() file_pattern = os.path.join(input_dir, 'data', prefix, prefix+'*'+suffix) filenames = tf.matching_files(file_pattern) dataset = tf.data.TFRecordDataset(filenames=filenames, buffer_size=None, num_parallel_reads=num_cpus) if mode == tf.estimator.ModeKeys.TRAIN: dataset = dataset.shuffle(shuffle_buffer_size) dataset = dataset.repeat(num_epochs) dataset = dataset.batch(batch_size) dataset = dataset.map( lambda examples: tf.parse_example(examples, feature_spec)) iterator = dataset.make_one_shot_iterator() features = iterator.get_next() if mode == tf.estimator.ModeKeys.PREDICT: return features label = features.pop(label_name) return features, label
Example #8
Source File: data_utils.py From ID-CNN-CWS with GNU General Public License v3.0 | 5 votes |
def __init__(self, in_pattern, batch_size, num_buckets=0, num_epochs=None): self._batch_size = batch_size self.num_buckets = num_buckets self._epoch = 0 self._step = 1. self.num_epochs = num_epochs file_pattern = in_pattern + '/examples.proto' if os.path.isdir(in_pattern) else in_pattern filenames = tf.matching_files(file_pattern) # filenames = tf.Print(filenames, [filenames], message='filenames: ') self.next_batch_op = self.input_pipeline(filenames, self._batch_size, self.num_buckets, self.num_epochs)
Example #9
Source File: data_utils.py From bran with Apache License 2.0 | 5 votes |
def input_pipeline(self, file_pattern, batch_size, num_epochs=None, num_threads=10): filenames = tf.matching_files(file_pattern) filename_queue = tf.train.string_input_producer(filenames, num_epochs=num_epochs, shuffle=True) parsed_batch = self.example_parser(filename_queue) min_after_dequeue = 10000 capacity = min_after_dequeue + 12 * batch_size next_batch = tf.train.batch( parsed_batch, batch_size=batch_size, capacity=capacity, num_threads=num_threads, dynamic_pad=True, allow_smaller_final_batch=True) return next_batch
Example #10
Source File: dataset_util.py From Elphas with Apache License 2.0 | 5 votes |
def read_dataset(file_read_func, decode_func, input_files, config): """Reads a dataset, and handles repetition and shuffling. Args: file_read_func: Function to use in tf.data.Dataset.interleave, to read every individual file into a tf.data.Dataset. decode_func: Function to apply to all records. input_files: A list of file paths to read. config: A input_reader_builder.InputReader object. Returns: A tf.data.Dataset based on config. """ # Shard, shuffle, and read files. filenames = tf.concat([tf.matching_files(pattern) for pattern in input_files], 0) filename_dataset = tf.data.Dataset.from_tensor_slices(filenames) if config.shuffle: filename_dataset = filename_dataset.shuffle( config.filenames_shuffle_buffer_size) filename_dataset = filename_dataset.repeat(config.num_epochs or None) records_dataset = filename_dataset.apply( tf.contrib.data.parallel_interleave( file_read_func, cycle_length=config.num_readers, sloppy=True)) if config.shuffle: records_dataset.shuffle(config.shuffle_buffer_size) tensor_dataset = records_dataset.map( decode_func, num_parallel_calls=config.num_parallel_map_calls) return tensor_dataset.prefetch(config.prefetch_size)
Example #11
Source File: dataset_util.py From AniSeg with Apache License 2.0 | 5 votes |
def read_dataset(file_read_func, decode_func, input_files, config): """Reads a dataset, and handles repetition and shuffling. Args: file_read_func: Function to use in tf.data.Dataset.interleave, to read every individual file into a tf.data.Dataset. decode_func: Function to apply to all records. input_files: A list of file paths to read. config: A input_reader_builder.InputReader object. Returns: A tf.data.Dataset based on config. """ # Shard, shuffle, and read files. filenames = tf.concat([tf.matching_files(pattern) for pattern in input_files], 0) filename_dataset = tf.data.Dataset.from_tensor_slices(filenames) if config.shuffle: filename_dataset = filename_dataset.shuffle( config.filenames_shuffle_buffer_size) filename_dataset = filename_dataset.repeat(config.num_epochs or None) records_dataset = filename_dataset.apply( tf.contrib.data.parallel_interleave( file_read_func, cycle_length=config.num_readers, sloppy=True)) if config.shuffle: records_dataset.shuffle(config.shuffle_buffer_size) tensor_dataset = records_dataset.map( decode_func, num_parallel_calls=config.num_parallel_map_calls) return tensor_dataset.prefetch(config.prefetch_size)
Example #12
Source File: dataset_util.py From Traffic-Rule-Violation-Detection-System with MIT License | 4 votes |
def read_dataset( file_read_func, decode_func, input_files, config, num_workers=1, worker_index=0): """Reads a dataset, and handles repetition and shuffling. Args: file_read_func: Function to use in tf.data.Dataset.interleave, to read every individual file into a tf.data.Dataset. decode_func: Function to apply to all records. input_files: A list of file paths to read. config: A input_reader_builder.InputReader object. num_workers: Number of workers / shards. worker_index: Id for the current worker. Returns: A tf.data.Dataset based on config. """ # Shard, shuffle, and read files. filenames = tf.concat([tf.matching_files(pattern) for pattern in input_files], 0) dataset = tf.data.Dataset.from_tensor_slices(filenames) dataset = dataset.shard(num_workers, worker_index) dataset = dataset.repeat(config.num_epochs or None) if config.shuffle: dataset = dataset.shuffle(config.filenames_shuffle_buffer_size, reshuffle_each_iteration=True) # Read file records and shuffle them. # If cycle_length is larger than the number of files, more than one reader # will be assigned to the same file, leading to repetition. cycle_length = tf.cast( tf.minimum(config.num_readers, tf.size(filenames)), tf.int64) # TODO: find the optimal block_length. dataset = dataset.interleave( file_read_func, cycle_length=cycle_length, block_length=1) if config.shuffle: dataset = dataset.shuffle(config.shuffle_buffer_size, reshuffle_each_iteration=True) dataset = dataset.map(decode_func, num_parallel_calls=config.num_readers) return dataset.prefetch(config.prefetch_buffer_size)
Example #13
Source File: dataset_util.py From deepglobe_land_cover_classification_with_deeplabv3plus with MIT License | 4 votes |
def read_dataset( file_read_func, decode_func, input_files, config, num_workers=1, worker_index=0): """Reads a dataset, and handles repetition and shuffling. Args: file_read_func: Function to use in tf.data.Dataset.interleave, to read every individual file into a tf.data.Dataset. decode_func: Function to apply to all records. input_files: A list of file paths to read. config: A input_reader_builder.InputReader object. num_workers: Number of workers / shards. worker_index: Id for the current worker. Returns: A tf.data.Dataset based on config. """ # Shard, shuffle, and read files. filenames = tf.concat([tf.matching_files(pattern) for pattern in input_files], 0) dataset = tf.data.Dataset.from_tensor_slices(filenames) dataset = dataset.shard(num_workers, worker_index) dataset = dataset.repeat(config.num_epochs or None) if config.shuffle: dataset = dataset.shuffle(config.filenames_shuffle_buffer_size, reshuffle_each_iteration=True) # Read file records and shuffle them. # If cycle_length is larger than the number of files, more than one reader # will be assigned to the same file, leading to repetition. cycle_length = tf.cast( tf.minimum(config.num_readers, tf.size(filenames)), tf.int64) # TODO: find the optimal block_length. dataset = dataset.interleave( file_read_func, cycle_length=cycle_length, block_length=1) if config.shuffle: dataset = dataset.shuffle(config.shuffle_buffer_size, reshuffle_each_iteration=True) dataset = dataset.map(decode_func, num_parallel_calls=config.num_readers) return dataset.prefetch(config.prefetch_buffer_size)
Example #14
Source File: dataset_util.py From tensorflow-deeplab-v3 with MIT License | 4 votes |
def read_dataset( file_read_func, decode_func, input_files, config, num_workers=1, worker_index=0): """Reads a dataset, and handles repetition and shuffling. Args: file_read_func: Function to use in tf.data.Dataset.interleave, to read every individual file into a tf.data.Dataset. decode_func: Function to apply to all records. input_files: A list of file paths to read. config: A input_reader_builder.InputReader object. num_workers: Number of workers / shards. worker_index: Id for the current worker. Returns: A tf.data.Dataset based on config. """ # Shard, shuffle, and read files. filenames = tf.concat([tf.matching_files(pattern) for pattern in input_files], 0) dataset = tf.data.Dataset.from_tensor_slices(filenames) dataset = dataset.shard(num_workers, worker_index) dataset = dataset.repeat(config.num_epochs or None) if config.shuffle: dataset = dataset.shuffle(config.filenames_shuffle_buffer_size, reshuffle_each_iteration=True) # Read file records and shuffle them. # If cycle_length is larger than the number of files, more than one reader # will be assigned to the same file, leading to repetition. cycle_length = tf.cast( tf.minimum(config.num_readers, tf.size(filenames)), tf.int64) # TODO: find the optimal block_length. dataset = dataset.interleave( file_read_func, cycle_length=cycle_length, block_length=1) if config.shuffle: dataset = dataset.shuffle(config.shuffle_buffer_size, reshuffle_each_iteration=True) dataset = dataset.map(decode_func, num_parallel_calls=config.num_readers) return dataset.prefetch(config.prefetch_buffer_size)
Example #15
Source File: dataset_util.py From tensorflow-deeplab-v3-plus with MIT License | 4 votes |
def read_dataset( file_read_func, decode_func, input_files, config, num_workers=1, worker_index=0): """Reads a dataset, and handles repetition and shuffling. Args: file_read_func: Function to use in tf.data.Dataset.interleave, to read every individual file into a tf.data.Dataset. decode_func: Function to apply to all records. input_files: A list of file paths to read. config: A input_reader_builder.InputReader object. num_workers: Number of workers / shards. worker_index: Id for the current worker. Returns: A tf.data.Dataset based on config. """ # Shard, shuffle, and read files. filenames = tf.concat([tf.matching_files(pattern) for pattern in input_files], 0) dataset = tf.data.Dataset.from_tensor_slices(filenames) dataset = dataset.shard(num_workers, worker_index) dataset = dataset.repeat(config.num_epochs or None) if config.shuffle: dataset = dataset.shuffle(config.filenames_shuffle_buffer_size, reshuffle_each_iteration=True) # Read file records and shuffle them. # If cycle_length is larger than the number of files, more than one reader # will be assigned to the same file, leading to repetition. cycle_length = tf.cast( tf.minimum(config.num_readers, tf.size(filenames)), tf.int64) # TODO: find the optimal block_length. dataset = dataset.interleave( file_read_func, cycle_length=cycle_length, block_length=1) if config.shuffle: dataset = dataset.shuffle(config.shuffle_buffer_size, reshuffle_each_iteration=True) dataset = dataset.map(decode_func, num_parallel_calls=config.num_readers) return dataset.prefetch(config.prefetch_buffer_size)
Example #16
Source File: dataset_util.py From LaneSegmentationNetwork with GNU Lesser General Public License v3.0 | 4 votes |
def read_dataset( file_read_func, decode_func, input_files, config, num_workers=1, worker_index=0): """Reads a dataset, and handles repetition and shuffling. Args: file_read_func: Function to use in tf.data.Dataset.interleave, to read every individual file into a tf.data.Dataset. decode_func: Function to apply to all records. input_files: A list of file paths to read. config: A input_reader_builder.InputReader object. num_workers: Number of workers / shards. worker_index: Id for the current worker. Returns: A tf.data.Dataset based on config. """ # Shard, shuffle, and read files. filenames = tf.concat([tf.matching_files(pattern) for pattern in input_files], 0) dataset = tf.data.Dataset.from_tensor_slices(filenames) dataset = dataset.shard(num_workers, worker_index) dataset = dataset.repeat(config.num_epochs or None) if config.shuffle: dataset = dataset.shuffle(config.filenames_shuffle_buffer_size, reshuffle_each_iteration=True) # Read file records and shuffle them. # If cycle_length is larger than the number of files, more than one reader # will be assigned to the same file, leading to repetition. cycle_length = tf.cast( tf.minimum(config.num_readers, tf.size(filenames)), tf.int64) # TODO: find the optimal block_length. dataset = dataset.interleave( file_read_func, cycle_length=cycle_length, block_length=1) if config.shuffle: dataset = dataset.shuffle(config.shuffle_buffer_size, reshuffle_each_iteration=True) dataset = dataset.map(decode_func, num_parallel_calls=config.num_readers) return dataset.prefetch(config.prefetch_buffer_size)