Python tensorflow.python.ops.data_flow_ops.RecordInput() Examples
The following are 12
code examples of tensorflow.python.ops.data_flow_ops.RecordInput().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.python.ops.data_flow_ops
, or try the search function
.
Example #1
Source File: preprocessing.py From models with Apache License 2.0 | 5 votes |
def minibatch(self, dataset, subset): with tf.compat.v1.name_scope('batch_processing'): images = [[] for i in range(self.device_count)] labels = [[] for i in range(self.device_count)] record_input = data_flow_ops.RecordInput( file_pattern=dataset.tf_record_pattern(subset), seed=randint(0, 9000), parallelism=64, buffer_size=10000, batch_size=self.batch_size, name='record_input') records = record_input.get_yield_op() records = tf.split(records, self.batch_size, 0) records = [tf.reshape(record, []) for record in records] for i in xrange(self.batch_size): value = records[i] image_buffer, label_index, bbox, _ = parse_example_proto(value) image = self.preprocess(image_buffer, bbox, i % 4) device_index = i % self.device_count images[device_index].append(image) labels[device_index].append(label_index) label_index_batch = [None] * self.device_count for device_index in xrange(self.device_count): images[device_index] = tf.parallel_stack(images[device_index]) label_index_batch[device_index] = tf.concat(labels[device_index], 0) # dynamic_pad=True) # HACK TESTING dynamic_pad=True images[device_index] = tf.cast(images[device_index], self.dtype) depth = 3 images[device_index] = tf.reshape( images[device_index], shape=[self.batch_size_per_device, self.height, self.width, depth]) label_index_batch[device_index] = tf.reshape( label_index_batch[device_index], [self.batch_size_per_device]) # Display the training images in the visualizer. # tf.summary.image('images', images) return images, label_index_batch, records
Example #2
Source File: preprocessing.py From models with Apache License 2.0 | 5 votes |
def minibatch(self, dataset, subset): with tf.compat.v1.name_scope('batch_processing'): images = [[] for i in range(self.device_count)] labels = [[] for i in range(self.device_count)] record_input = data_flow_ops.RecordInput( file_pattern=dataset.tf_record_pattern(subset), seed=randint(0, 9000), parallelism=64, buffer_size=10000, batch_size=self.batch_size, name='record_input') records = record_input.get_yield_op() records = tf.split(records, self.batch_size, 0) records = [tf.reshape(record, []) for record in records] for i in xrange(self.batch_size): value = records[i] image_buffer, label_index, bbox, _ = parse_example_proto(value) image = self.preprocess(image_buffer, bbox, i % 4) device_index = i % self.device_count images[device_index].append(image) labels[device_index].append(label_index) label_index_batch = [None] * self.device_count for device_index in xrange(self.device_count): images[device_index] = tf.parallel_stack(images[device_index]) label_index_batch[device_index] = tf.concat(labels[device_index], 0) # dynamic_pad=True) # HACK TESTING dynamic_pad=True images[device_index] = tf.cast(images[device_index], self.dtype) depth = 3 images[device_index] = tf.reshape( images[device_index], shape=[self.batch_size_per_device, self.height, self.width, depth]) label_index_batch[device_index] = tf.reshape( label_index_batch[device_index], [self.batch_size_per_device]) # Display the training images in the visualizer. # tf.summary.image('images', images) return images, label_index_batch, records
Example #3
Source File: preprocessing.py From models with Apache License 2.0 | 5 votes |
def minibatch(self, dataset, subset): with tf.compat.v1.name_scope('batch_processing'): images = [[] for i in range(self.device_count)] labels = [[] for i in range(self.device_count)] record_input = data_flow_ops.RecordInput( file_pattern=dataset.tf_record_pattern(subset), seed=randint(0, 9000), parallelism=64, buffer_size=10000, batch_size=self.batch_size, name='record_input') records = record_input.get_yield_op() records = tf.split(records, self.batch_size, 0) records = [tf.reshape(record, []) for record in records] for i in xrange(self.batch_size): value = records[i] image_buffer, label_index, bbox, _ = parse_example_proto(value) image = self.preprocess(image_buffer, bbox, i % 4) device_index = i % self.device_count images[device_index].append(image) labels[device_index].append(label_index) label_index_batch = [None] * self.device_count for device_index in xrange(self.device_count): images[device_index] = tf.parallel_stack(images[device_index]) label_index_batch[device_index] = tf.concat(labels[device_index], 0) # dynamic_pad=True) # HACK TESTING dynamic_pad=True images[device_index] = tf.cast(images[device_index], self.dtype) depth = 3 images[device_index] = tf.reshape( images[device_index], shape=[self.batch_size_per_device, self.height, self.width, depth]) label_index_batch[device_index] = tf.reshape( label_index_batch[device_index], [self.batch_size_per_device]) # Display the training images in the visualizer. # tf.summary.image('images', images) return images, label_index_batch
Example #4
Source File: image_preprocessing.py From models with Apache License 2.0 | 5 votes |
def minibatch(self, dataset, subset): with tf.compat.v1.name_scope('batch_processing'): images = [[] for i in range(self.device_count)] labels = [[] for i in range(self.device_count)] record_input = data_flow_ops.RecordInput( file_pattern=dataset.tf_record_pattern(subset), seed=randint(0, 9000), parallelism=64, buffer_size=10000, batch_size=self.batch_size, name='record_input') records = record_input.get_yield_op() records = tf.split(records, self.batch_size, 0) records = [tf.reshape(record, []) for record in records] for i in xrange(self.batch_size): value = records[i] image_buffer, label_index, bbox, _ = parse_example_proto(value) image = self.preprocess(image_buffer, bbox, i % 4) device_index = i % self.device_count images[device_index].append(image) labels[device_index].append(label_index) label_index_batch = [None] * self.device_count for device_index in xrange(self.device_count): images[device_index] = tf.parallel_stack(images[device_index]) label_index_batch[device_index] = tf.concat(labels[device_index], 0) # dynamic_pad=True) # HACK TESTING dynamic_pad=True images[device_index] = tf.cast(images[device_index], self.dtype) depth = 3 images[device_index] = tf.reshape( images[device_index], shape=[self.batch_size_per_device, self.height, self.width, depth]) label_index_batch[device_index] = tf.reshape( label_index_batch[device_index], [self.batch_size_per_device]) # Display the training images in the visualizer. # tf.summary.image('images', images) return images, label_index_batch
Example #5
Source File: preprocessing.py From parallax with Apache License 2.0 | 5 votes |
def __init__(self, height, width, batch_size, num_splits, dtype, train, distortions, resize_method, shift_ratio, summary_verbosity=0, distort_color_in_yiq=False, fuse_decode_and_crop=False): # Process images of this size. Depending on the model configuration, the # size of the input layer might differ from the original size of 32 x 32 # . self.height = height or 32 self.width = width or 32 self.depth = 3 self.batch_size = batch_size self.num_splits = num_splits self.dtype = dtype self.train = train self.distortions = distortions self.shift_ratio = shift_ratio del distort_color_in_yiq del fuse_decode_and_crop del resize_method del shift_ratio # unused, because a RecordInput is not used if self.batch_size % self.num_splits != 0: raise ValueError( ('batch_size must be a multiple of num_splits: ' 'batch_size %d, num_splits: %d') % (self.batch_size, self.num_splits)) self.batch_size_per_split = self.batch_size // self.num_splits self.summary_verbosity = summary_verbosity
Example #6
Source File: preprocessing.py From deeplearning-benchmark with Apache License 2.0 | 5 votes |
def __init__(self, height, width, batch_size, num_splits, dtype, train, distortions, resize_method, shift_ratio, summary_verbosity=0, distort_color_in_yiq=False, fuse_decode_and_crop=False): # Process images of this size. Depending on the model configuration, the # size of the input layer might differ from the original size of 32 x 32. self.height = height or 32 self.width = width or 32 self.depth = 3 self.batch_size = batch_size self.num_splits = num_splits self.dtype = dtype self.train = train self.distortions = distortions self.shift_ratio = shift_ratio del distort_color_in_yiq del fuse_decode_and_crop del resize_method del shift_ratio # unused, because a RecordInput is not used if self.batch_size % self.num_splits != 0: raise ValueError( ('batch_size must be a multiple of num_splits: ' 'batch_size %d, num_splits: %d') % (self.batch_size, self.num_splits)) self.batch_size_per_split = self.batch_size // self.num_splits self.summary_verbosity = summary_verbosity
Example #7
Source File: grasp_dataset.py From costar_plan with Apache License 2.0 | 5 votes |
def _get_tfrecord_path_glob_pattern(self, dataset=None): """Get the Glob string pattern for matching the specified dataset tfrecords. This will often be used in conjunction with the RecordInput class if you need a custom dataset loading function. # Arguments data_dir: The path to the folder containing the grasp dataset. dataset: The name of the dataset to download, downloads all by default with the '' parameter, 102 will download the 102 feature dataset found in grasp_listing.txt. """ dataset = self._update_dataset_param(dataset) return os.path.join(os.path.expanduser(self.data_dir), '*{}.tfrecord*'.format(dataset))
Example #8
Source File: grasp_dataset.py From costar_plan with Apache License 2.0 | 5 votes |
def _get_simple_parallel_dataset_ops(self, dataset=None, batch_size=1, buffer_size=300, parallelism=20, shift_ratio=0.01): """ Simple unordered & parallel TensorFlow ops that go through the whole dataset. # Returns A list of tuples ([(fixedLengthFeatureDict, sequenceFeatureDict)], features_complete_list, num_samples). fixedLengthFeatureDict maps from the feature strings of most features to their TF ops. sequenceFeatureDict maps from feature strings to time ordered sequences of poses transforming from the robot base to end effector. features_complete_list: a list of all feature strings in the fixedLengthFeatureDict and sequenceFeatureDict, and a parameter for get_time_ordered_features(). num_samples: the number of samples in the dataset, used for configuring the size of one training epoch shift_ratio: The order the files are read will be shifted each epoch by shift_amount so that the data is presented in a different order every epoch, 0 means the order always stays the same. """ tf_glob = self._get_tfrecord_path_glob_pattern(dataset=dataset) record_input = data_flow_ops.RecordInput(tf_glob, batch_size, buffer_size, parallelism, shift_ratio=shift_ratio) records_op = record_input.get_yield_op() records_op = tf.split(records_op, batch_size, 0) records_op = [tf.reshape(record, []) for record in records_op] features_complete_list, num_samples = self.get_features() feature_op_dicts = [self._parse_grasp_attempt_protobuf(serialized_protobuf, features_complete_list) for serialized_protobuf in tqdm(records_op, desc='get_simple_parallel_dataset_ops.parse_protobuf')] # TODO(ahundt) https://www.tensorflow.org/performance/performance_models # make sure records are always ready to go on cpu and gpu via prefetching in a staging area # staging_area = tf.contrib.staging.StagingArea() dict_and_feature_tuple_list = [] # Get all image features to finish extracting image data '/image/encoded' 'depth_image/decoded' 'xyz_image/decoded')) image_features = GraspDataset.get_time_ordered_features(features_complete_list, '/image/encoded') image_features = np.append(image_features, GraspDataset.get_time_ordered_features(features_complete_list, 'depth_image/encoded')) for feature_op_dict, sequence_op_dict in tqdm(feature_op_dicts, desc='get_simple_parallel_dataset_ops.image_decode_batches'): new_feature_op_dict, new_feature_list = GraspDataset._image_decode(feature_op_dict, image_features=image_features) dict_and_feature_tuple_list.append((new_feature_op_dict, sequence_op_dict)) # the new_feature_list should be the same for all the ops features_complete_list = np.append(features_complete_list, new_feature_list) return dict_and_feature_tuple_list, features_complete_list, num_samples
Example #9
Source File: nvcnn.py From dlcookbook-dlbs with Apache License 2.0 | 5 votes |
def device_minibatches(self, total_batch_size): record_input = data_flow_ops.RecordInput( file_pattern=os.path.join(FLAGS.data_dir, '%s-*' % self.subset), parallelism=64, # Note: This causes deadlock during init if larger than dataset buffer_size=FLAGS.input_buffer_size, batch_size=total_batch_size) records = record_input.get_yield_op() # Split batch into individual images records = tf.split(records, total_batch_size, 0) records = [tf.reshape(record, []) for record in records] # Deserialize and preprocess images into batches for each device images = defaultdict(list) labels = defaultdict(list) with tf.name_scope('input_pipeline'): for i, record in enumerate(records): imgdata, label, bbox, text = deserialize_image_record(record) image = self.preprocess(imgdata, bbox, thread_id=i) label -= 1 # Change to 0-based (don't use background class) device_num = i % self.num_devices images[device_num].append(image) labels[device_num].append(label) # Stack images back into a sub-batch for each device for device_num in range(self.num_devices): images[device_num] = tf.parallel_stack(images[device_num]) labels[device_num] = tf.concat(labels[device_num], 0) images[device_num] = tf.reshape(images[device_num], [-1, self.height, self.width, 3]) images[device_num] = tf.clip_by_value(images[device_num], 0., 255.) images[device_num] = tf.cast(images[device_num], self.dtype) return images, labels
Example #10
Source File: preprocessing.py From dlcookbook-dlbs with Apache License 2.0 | 5 votes |
def __init__(self, height, width, batch_size, num_splits, dtype, train, distortions, resize_method, shift_ratio, summary_verbosity=0, distort_color_in_yiq=False, fuse_decode_and_crop=False): # Process images of this size. Depending on the model configuration, the # size of the input layer might differ from the original size of 32 x 32. self.height = height or 32 self.width = width or 32 self.depth = 3 self.batch_size = batch_size self.num_splits = num_splits self.dtype = dtype self.train = train self.distortions = distortions self.shift_ratio = shift_ratio del distort_color_in_yiq del fuse_decode_and_crop del resize_method del shift_ratio # unused, because a RecordInput is not used if self.batch_size % self.num_splits != 0: raise ValueError( ('batch_size must be a multiple of num_splits: ' 'batch_size %d, num_splits: %d') % (self.batch_size, self.num_splits)) self.batch_size_per_split = self.batch_size // self.num_splits self.summary_verbosity = summary_verbosity
Example #11
Source File: preprocessing.py From benchmarks with Apache License 2.0 | 4 votes |
def minibatch(self, dataset, subset, params, shift_ratio=-1): if shift_ratio < 0: shift_ratio = self.shift_ratio with tf.name_scope('batch_processing'): # Build final results per split. images = [[] for _ in range(self.num_splits)] labels = [[] for _ in range(self.num_splits)] if params.use_datasets: ds = self.create_dataset( self.batch_size, self.num_splits, self.batch_size_per_split, dataset, subset, self.train, datasets_repeat_cached_sample=params.datasets_repeat_cached_sample, num_threads=params.datasets_num_private_threads, datasets_use_caching=params.datasets_use_caching, datasets_parallel_interleave_cycle_length=( params.datasets_parallel_interleave_cycle_length), datasets_sloppy_parallel_interleave=( params.datasets_sloppy_parallel_interleave), datasets_parallel_interleave_prefetch=( params.datasets_parallel_interleave_prefetch)) ds_iterator = self.create_iterator(ds) for d in xrange(self.num_splits): images[d], labels[d] = ds_iterator.get_next() # TODO(laigd): consider removing the --use_datasets option, it should # always use datasets. else: record_input = data_flow_ops.RecordInput( file_pattern=dataset.tf_record_pattern(subset), seed=301, parallelism=64, buffer_size=10000, batch_size=self.batch_size, shift_ratio=shift_ratio, name='record_input') records = record_input.get_yield_op() records = tf.split(records, self.batch_size, 0) records = [tf.reshape(record, []) for record in records] for idx in xrange(self.batch_size): value = records[idx] (image, label) = self.parse_and_preprocess(value, idx) split_index = idx % self.num_splits labels[split_index].append(label) images[split_index].append(image) for split_index in xrange(self.num_splits): if not params.use_datasets: images[split_index] = tf.parallel_stack(images[split_index]) labels[split_index] = tf.concat(labels[split_index], 0) images[split_index] = tf.reshape( images[split_index], shape=[self.batch_size_per_split, self.height, self.width, self.depth]) labels[split_index] = tf.reshape(labels[split_index], [self.batch_size_per_split]) return images, labels
Example #12
Source File: resnet_common.py From keras_experiments with The Unlicense | 4 votes |
def device_minibatches(cls, num_devices, data_dir, total_batch_size, height, width, distort_color, val=False): dtype = tf.float32 subset = 'validation' if val else 'train' nrecord = get_num_records(os.path.join( data_dir, '{}-*'.format(subset))) input_buffer_size = min(10000, nrecord) record_input = data_flow_ops.RecordInput( file_pattern=os.path.join(data_dir, '{}-*'.format(subset)), parallelism=64, # Note: This causes deadlock during init if # larger than dataset buffer_size=input_buffer_size, batch_size=total_batch_size, seed=0) records = record_input.get_yield_op() # Split batch into individual images records = tf.split(records, total_batch_size, 0) records = [tf.reshape(record, []) for record in records] # Deserialize and preprocess images into batches for each device images = defaultdict(list) labels = defaultdict(list) with tf.name_scope('input_pipeline'): for thread_id, record in enumerate(records): imgdata, label, bbox, _ = cls._deserialize_image_record(record) image = cls._preprocess( imgdata, bbox, thread_id, height, width, distort_color, val=val) label -= 1 # Change to 0-based (don't use background class) device_num = thread_id % num_devices images[device_num].append(image) labels[device_num].append(label) # Stack images back into a sub-batch for each device for device_num in xrange(num_devices): images[device_num] = tf.parallel_stack(images[device_num]) labels[device_num] = tf.concat(labels[device_num], 0) images[device_num] = tf.reshape( images[device_num], [-1, height, width, 3]) images[device_num] = tf.clip_by_value( images[device_num], 0., 255.) images[device_num] = tf.cast(images[device_num], dtype) return images, labels, nrecord