Python Examples of tensorflow.python.ops.data_flow

Source File: preprocessing.py From models with Apache License 2.0

5 votes

def minibatch(self, dataset, subset):
    with tf.compat.v1.name_scope('batch_processing'):
      images = [[] for i in range(self.device_count)]
      labels = [[] for i in range(self.device_count)]
      record_input = data_flow_ops.RecordInput(
          file_pattern=dataset.tf_record_pattern(subset),
          seed=randint(0, 9000),
          parallelism=64,
          buffer_size=10000,
          batch_size=self.batch_size,
          name='record_input')
      records = record_input.get_yield_op()
      records = tf.split(records, self.batch_size, 0)
      records = [tf.reshape(record, []) for record in records]
      for i in xrange(self.batch_size):
        value = records[i]
        image_buffer, label_index, bbox, _ = parse_example_proto(value)
        image = self.preprocess(image_buffer, bbox, i % 4)
        device_index = i % self.device_count
        images[device_index].append(image)
        labels[device_index].append(label_index)
      label_index_batch = [None] * self.device_count
      for device_index in xrange(self.device_count):
        images[device_index] = tf.parallel_stack(images[device_index])
        label_index_batch[device_index] = tf.concat(labels[device_index], 0)

        # dynamic_pad=True) # HACK TESTING dynamic_pad=True
        images[device_index] = tf.cast(images[device_index], self.dtype)
        depth = 3
        images[device_index] = tf.reshape(
            images[device_index],
            shape=[self.batch_size_per_device, self.height, self.width, depth])
        label_index_batch[device_index] = tf.reshape(
            label_index_batch[device_index], [self.batch_size_per_device])
        # Display the training images in the visualizer.
        # tf.summary.image('images', images)

      return images, label_index_batch, records

Source File: preprocessing.py From models with Apache License 2.0

5 votes

def minibatch(self, dataset, subset):
    with tf.compat.v1.name_scope('batch_processing'):
      images = [[] for i in range(self.device_count)]
      labels = [[] for i in range(self.device_count)]
      record_input = data_flow_ops.RecordInput(
          file_pattern=dataset.tf_record_pattern(subset),
          seed=randint(0, 9000),
          parallelism=64,
          buffer_size=10000,
          batch_size=self.batch_size,
          name='record_input')
      records = record_input.get_yield_op()
      records = tf.split(records, self.batch_size, 0)
      records = [tf.reshape(record, []) for record in records]
      for i in xrange(self.batch_size):
        value = records[i]
        image_buffer, label_index, bbox, _ = parse_example_proto(value)
        image = self.preprocess(image_buffer, bbox, i % 4)
        device_index = i % self.device_count
        images[device_index].append(image)
        labels[device_index].append(label_index)
      label_index_batch = [None] * self.device_count
      for device_index in xrange(self.device_count):
        images[device_index] = tf.parallel_stack(images[device_index])
        label_index_batch[device_index] = tf.concat(labels[device_index], 0)

        # dynamic_pad=True) # HACK TESTING dynamic_pad=True
        images[device_index] = tf.cast(images[device_index], self.dtype)
        depth = 3
        images[device_index] = tf.reshape(
            images[device_index],
            shape=[self.batch_size_per_device, self.height, self.width, depth])
        label_index_batch[device_index] = tf.reshape(
            label_index_batch[device_index], [self.batch_size_per_device])
        # Display the training images in the visualizer.
        # tf.summary.image('images', images)

      return images, label_index_batch, records

Source File: preprocessing.py From models with Apache License 2.0

5 votes

def minibatch(self, dataset, subset):
    with tf.compat.v1.name_scope('batch_processing'):
      images = [[] for i in range(self.device_count)]
      labels = [[] for i in range(self.device_count)]
      record_input = data_flow_ops.RecordInput(
          file_pattern=dataset.tf_record_pattern(subset),
          seed=randint(0, 9000),
          parallelism=64,
          buffer_size=10000,
          batch_size=self.batch_size,
          name='record_input')
      records = record_input.get_yield_op()
      records = tf.split(records, self.batch_size, 0)
      records = [tf.reshape(record, []) for record in records]
      for i in xrange(self.batch_size):
        value = records[i]
        image_buffer, label_index, bbox, _ = parse_example_proto(value)
        image = self.preprocess(image_buffer, bbox, i % 4)

        device_index = i % self.device_count
        images[device_index].append(image)
        labels[device_index].append(label_index)
      label_index_batch = [None] * self.device_count
      for device_index in xrange(self.device_count):
        images[device_index] = tf.parallel_stack(images[device_index])
        label_index_batch[device_index] = tf.concat(labels[device_index], 0)

        # dynamic_pad=True) # HACK TESTING dynamic_pad=True
        images[device_index] = tf.cast(images[device_index], self.dtype)
        depth = 3
        images[device_index] = tf.reshape(
            images[device_index],
            shape=[self.batch_size_per_device, self.height, self.width, depth])
        label_index_batch[device_index] = tf.reshape(
            label_index_batch[device_index], [self.batch_size_per_device])
        # Display the training images in the visualizer.
        # tf.summary.image('images', images)

      return images, label_index_batch

Source File: image_preprocessing.py From models with Apache License 2.0

5 votes

def minibatch(self, dataset, subset):
    with tf.compat.v1.name_scope('batch_processing'):
      images = [[] for i in range(self.device_count)]
      labels = [[] for i in range(self.device_count)]
      record_input = data_flow_ops.RecordInput(
          file_pattern=dataset.tf_record_pattern(subset),
          seed=randint(0, 9000),
          parallelism=64,
          buffer_size=10000,
          batch_size=self.batch_size,
          name='record_input')
      records = record_input.get_yield_op()
      records = tf.split(records, self.batch_size, 0)
      records = [tf.reshape(record, []) for record in records]
      for i in xrange(self.batch_size):
        value = records[i]
        image_buffer, label_index, bbox, _ = parse_example_proto(value)
        image = self.preprocess(image_buffer, bbox, i % 4)

        device_index = i % self.device_count
        images[device_index].append(image)
        labels[device_index].append(label_index)
      label_index_batch = [None] * self.device_count
      for device_index in xrange(self.device_count):
        images[device_index] = tf.parallel_stack(images[device_index])
        label_index_batch[device_index] = tf.concat(labels[device_index], 0)

        # dynamic_pad=True) # HACK TESTING dynamic_pad=True
        images[device_index] = tf.cast(images[device_index], self.dtype)
        depth = 3
        images[device_index] = tf.reshape(
            images[device_index],
            shape=[self.batch_size_per_device, self.height, self.width, depth])
        label_index_batch[device_index] = tf.reshape(
            label_index_batch[device_index], [self.batch_size_per_device])
        # Display the training images in the visualizer.
        # tf.summary.image('images', images)

      return images, label_index_batch

Source File: preprocessing.py From parallax with Apache License 2.0

5 votes

def __init__(self,
                 height,
                 width,
                 batch_size,
                 num_splits,
                 dtype,
                 train,
                 distortions,
                 resize_method,
                 shift_ratio,
                 summary_verbosity=0,
                 distort_color_in_yiq=False,
                 fuse_decode_and_crop=False):
        # Process images of this size. Depending on the model configuration, the
        # size of the input layer might differ from the original size of 32 x 32
        # .
        self.height = height or 32
        self.width = width or 32
        self.depth = 3
        self.batch_size = batch_size
        self.num_splits = num_splits
        self.dtype = dtype
        self.train = train
        self.distortions = distortions
        self.shift_ratio = shift_ratio
        del distort_color_in_yiq
        del fuse_decode_and_crop
        del resize_method
        del shift_ratio  # unused, because a RecordInput is not used
        if self.batch_size % self.num_splits != 0:
            raise ValueError(
                ('batch_size must be a multiple of num_splits: '
                 'batch_size %d, num_splits: %d') %
                (self.batch_size, self.num_splits))
        self.batch_size_per_split = self.batch_size // self.num_splits
        self.summary_verbosity = summary_verbosity

Source File: preprocessing.py From deeplearning-benchmark with Apache License 2.0

5 votes

def __init__(self,
               height,
               width,
               batch_size,
               num_splits,
               dtype,
               train,
               distortions,
               resize_method,
               shift_ratio,
               summary_verbosity=0,
               distort_color_in_yiq=False,
               fuse_decode_and_crop=False):
    # Process images of this size. Depending on the model configuration, the
    # size of the input layer might differ from the original size of 32 x 32.
    self.height = height or 32
    self.width = width or 32
    self.depth = 3
    self.batch_size = batch_size
    self.num_splits = num_splits
    self.dtype = dtype
    self.train = train
    self.distortions = distortions
    self.shift_ratio = shift_ratio
    del distort_color_in_yiq
    del fuse_decode_and_crop
    del resize_method
    del shift_ratio  # unused, because a RecordInput is not used
    if self.batch_size % self.num_splits != 0:
      raise ValueError(
          ('batch_size must be a multiple of num_splits: '
           'batch_size %d, num_splits: %d') %
          (self.batch_size, self.num_splits))
    self.batch_size_per_split = self.batch_size // self.num_splits
    self.summary_verbosity = summary_verbosity

Source File: grasp_dataset.py From costar_plan with Apache License 2.0

5 votes

def _get_tfrecord_path_glob_pattern(self, dataset=None):
        """Get the Glob string pattern for matching the specified dataset tfrecords.

        This will often be used in conjunction with the RecordInput class if you need
        a custom dataset loading function.

        # Arguments
            data_dir: The path to the folder containing the grasp dataset.

            dataset: The name of the dataset to download, downloads all by default
                with the '' parameter, 102 will download the 102 feature dataset
                found in grasp_listing.txt.
        """
        dataset = self._update_dataset_param(dataset)
        return os.path.join(os.path.expanduser(self.data_dir), '*{}.tfrecord*'.format(dataset))

Source File: grasp_dataset.py From costar_plan with Apache License 2.0

5 votes

def _get_simple_parallel_dataset_ops(self, dataset=None, batch_size=1, buffer_size=300, parallelism=20, shift_ratio=0.01):
        """ Simple unordered & parallel TensorFlow ops that go through the whole dataset.

        # Returns

            A list of tuples ([(fixedLengthFeatureDict, sequenceFeatureDict)], features_complete_list, num_samples).
            fixedLengthFeatureDict maps from the feature strings of most features to their TF ops.
            sequenceFeatureDict maps from feature strings to time ordered sequences of poses transforming
            from the robot base to end effector.
            features_complete_list: a list of all feature strings in the fixedLengthFeatureDict and sequenceFeatureDict,
                and a parameter for get_time_ordered_features().
            num_samples: the number of samples in the dataset, used for configuring the size of one training epoch
            shift_ratio: The order the files are read will be shifted each epoch by shift_amount so that the data
                is presented in a different order every epoch, 0 means the order always stays the same.


        """
        tf_glob = self._get_tfrecord_path_glob_pattern(dataset=dataset)
        record_input = data_flow_ops.RecordInput(tf_glob, batch_size,
                                                 buffer_size, parallelism, shift_ratio=shift_ratio)
        records_op = record_input.get_yield_op()
        records_op = tf.split(records_op, batch_size, 0)
        records_op = [tf.reshape(record, []) for record in records_op]
        features_complete_list, num_samples = self.get_features()
        feature_op_dicts = [self._parse_grasp_attempt_protobuf(serialized_protobuf, features_complete_list)
                            for serialized_protobuf in tqdm(records_op, desc='get_simple_parallel_dataset_ops.parse_protobuf')]
        # TODO(ahundt) https://www.tensorflow.org/performance/performance_models
        # make sure records are always ready to go on cpu and gpu via prefetching in a staging area
        # staging_area = tf.contrib.staging.StagingArea()
        dict_and_feature_tuple_list = []
        # Get all image features to finish extracting image data '/image/encoded' 'depth_image/decoded' 'xyz_image/decoded'))
        image_features = GraspDataset.get_time_ordered_features(features_complete_list, '/image/encoded')
        image_features = np.append(image_features, GraspDataset.get_time_ordered_features(features_complete_list, 'depth_image/encoded'))
        for feature_op_dict, sequence_op_dict in tqdm(feature_op_dicts, desc='get_simple_parallel_dataset_ops.image_decode_batches'):
            new_feature_op_dict, new_feature_list = GraspDataset._image_decode(feature_op_dict, image_features=image_features)
            dict_and_feature_tuple_list.append((new_feature_op_dict, sequence_op_dict))
        # the new_feature_list should be the same for all the ops
        features_complete_list = np.append(features_complete_list, new_feature_list)

        return dict_and_feature_tuple_list, features_complete_list, num_samples

Source File: nvcnn.py From dlcookbook-dlbs with Apache License 2.0

5 votes

def device_minibatches(self, total_batch_size):
        record_input = data_flow_ops.RecordInput(
            file_pattern=os.path.join(FLAGS.data_dir, '%s-*' % self.subset),
            parallelism=64,
            # Note: This causes deadlock during init if larger than dataset
            buffer_size=FLAGS.input_buffer_size,
            batch_size=total_batch_size)
        records = record_input.get_yield_op()
        # Split batch into individual images
        records = tf.split(records, total_batch_size, 0)
        records = [tf.reshape(record, []) for record in records]
        # Deserialize and preprocess images into batches for each device
        images = defaultdict(list)
        labels = defaultdict(list)
        with tf.name_scope('input_pipeline'):
            for i, record in enumerate(records):
                imgdata, label, bbox, text = deserialize_image_record(record)
                image = self.preprocess(imgdata, bbox, thread_id=i)
                label -= 1 # Change to 0-based (don't use background class)
                device_num = i % self.num_devices
                images[device_num].append(image)
                labels[device_num].append(label)
            # Stack images back into a sub-batch for each device
            for device_num in range(self.num_devices):
                images[device_num] = tf.parallel_stack(images[device_num])
                labels[device_num] = tf.concat(labels[device_num], 0)
                images[device_num] = tf.reshape(images[device_num],
                                                [-1, self.height, self.width, 3])
                images[device_num] = tf.clip_by_value(images[device_num], 0., 255.)
                images[device_num] = tf.cast(images[device_num], self.dtype)
        return images, labels

Source File: preprocessing.py From dlcookbook-dlbs with Apache License 2.0

5 votes

def __init__(self,
               height,
               width,
               batch_size,
               num_splits,
               dtype,
               train,
               distortions,
               resize_method,
               shift_ratio,
               summary_verbosity=0,
               distort_color_in_yiq=False,
               fuse_decode_and_crop=False):
    # Process images of this size. Depending on the model configuration, the
    # size of the input layer might differ from the original size of 32 x 32.
    self.height = height or 32
    self.width = width or 32
    self.depth = 3
    self.batch_size = batch_size
    self.num_splits = num_splits
    self.dtype = dtype
    self.train = train
    self.distortions = distortions
    self.shift_ratio = shift_ratio
    del distort_color_in_yiq
    del fuse_decode_and_crop
    del resize_method
    del shift_ratio  # unused, because a RecordInput is not used
    if self.batch_size % self.num_splits != 0:
      raise ValueError(
          ('batch_size must be a multiple of num_splits: '
           'batch_size %d, num_splits: %d') %
          (self.batch_size, self.num_splits))
    self.batch_size_per_split = self.batch_size // self.num_splits
    self.summary_verbosity = summary_verbosity

Source File: preprocessing.py From benchmarks with Apache License 2.0

4 votes

def minibatch(self,
                dataset,
                subset,
                params,
                shift_ratio=-1):
    if shift_ratio < 0:
      shift_ratio = self.shift_ratio
    with tf.name_scope('batch_processing'):
      # Build final results per split.
      images = [[] for _ in range(self.num_splits)]
      labels = [[] for _ in range(self.num_splits)]
      if params.use_datasets:
        ds = self.create_dataset(
            self.batch_size, self.num_splits, self.batch_size_per_split,
            dataset, subset, self.train,
            datasets_repeat_cached_sample=params.datasets_repeat_cached_sample,
            num_threads=params.datasets_num_private_threads,
            datasets_use_caching=params.datasets_use_caching,
            datasets_parallel_interleave_cycle_length=(
                params.datasets_parallel_interleave_cycle_length),
            datasets_sloppy_parallel_interleave=(
                params.datasets_sloppy_parallel_interleave),
            datasets_parallel_interleave_prefetch=(
                params.datasets_parallel_interleave_prefetch))
        ds_iterator = self.create_iterator(ds)
        for d in xrange(self.num_splits):
          images[d], labels[d] = ds_iterator.get_next()

      # TODO(laigd): consider removing the --use_datasets option, it should
      # always use datasets.
      else:
        record_input = data_flow_ops.RecordInput(
            file_pattern=dataset.tf_record_pattern(subset),
            seed=301,
            parallelism=64,
            buffer_size=10000,
            batch_size=self.batch_size,
            shift_ratio=shift_ratio,
            name='record_input')
        records = record_input.get_yield_op()
        records = tf.split(records, self.batch_size, 0)
        records = [tf.reshape(record, []) for record in records]
        for idx in xrange(self.batch_size):
          value = records[idx]
          (image, label) = self.parse_and_preprocess(value, idx)
          split_index = idx % self.num_splits
          labels[split_index].append(label)
          images[split_index].append(image)

      for split_index in xrange(self.num_splits):
        if not params.use_datasets:
          images[split_index] = tf.parallel_stack(images[split_index])
          labels[split_index] = tf.concat(labels[split_index], 0)
        images[split_index] = tf.reshape(
            images[split_index],
            shape=[self.batch_size_per_split, self.height, self.width,
                   self.depth])
        labels[split_index] = tf.reshape(labels[split_index],
                                         [self.batch_size_per_split])
      return images, labels

Source File: resnet_common.py From keras_experiments with The Unlicense

4 votes

def device_minibatches(cls, num_devices, data_dir, total_batch_size,
                           height, width, distort_color,
                           val=False):
        dtype = tf.float32
        subset = 'validation' if val else 'train'

        nrecord = get_num_records(os.path.join(
            data_dir, '{}-*'.format(subset)))
        input_buffer_size = min(10000, nrecord)

        record_input = data_flow_ops.RecordInput(
            file_pattern=os.path.join(data_dir, '{}-*'.format(subset)),
            parallelism=64,
            # Note: This causes deadlock during init if
            # larger than dataset
            buffer_size=input_buffer_size,
            batch_size=total_batch_size,
            seed=0)

        records = record_input.get_yield_op()

        # Split batch into individual images
        records = tf.split(records, total_batch_size, 0)
        records = [tf.reshape(record, []) for record in records]
        # Deserialize and preprocess images into batches for each device
        images = defaultdict(list)
        labels = defaultdict(list)
        with tf.name_scope('input_pipeline'):
            for thread_id, record in enumerate(records):
                imgdata, label, bbox, _ = cls._deserialize_image_record(record)
                image = cls._preprocess(
                    imgdata, bbox, thread_id, height, width, distort_color,
                    val=val)
                label -= 1  # Change to 0-based (don't use background class)
                device_num = thread_id % num_devices
                images[device_num].append(image)
                labels[device_num].append(label)

            # Stack images back into a sub-batch for each device
            for device_num in xrange(num_devices):
                images[device_num] = tf.parallel_stack(images[device_num])
                labels[device_num] = tf.concat(labels[device_num], 0)
                images[device_num] = tf.reshape(
                    images[device_num], [-1, height, width, 3])
                images[device_num] = tf.clip_by_value(
                    images[device_num], 0., 255.)
                images[device_num] = tf.cast(images[device_num], dtype)

        return images, labels, nrecord

Python tensorflow.python.ops.data_flow_ops.RecordInput() Examples