Python tensorflow.decode_csv() Examples

The following are 30 code examples of tensorflow.decode_csv(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function .
Example #1
Source File: PASCALVOC2012Classification.py    From dynamic-training-bench with Mozilla Public License 2.0 6 votes vote down vote up
def _read_image_and_box(self, bboxes_csv):
        """Extract the filename from the queue, read the image and
        produce a single box
        Returns:
            image, box
        """

        reader = tf.TextLineReader(skip_header_lines=True)
        _, row = reader.read(bboxes_csv)
        # file ,y_min, x_min, y_max, x_max, label
        record_defaults = [[""], [0.], [0.], [0.], [0.], [0.]]
        # eg:
        # 2008_000033,0.1831831831831832,0.208,0.7717717717717718,0.952,0
        filename, y_min, x_min, y_max, x_max, label = tf.decode_csv(
            row, record_defaults)
        image_path = os.path.join(self._data_dir, 'VOCdevkit', 'VOC2012',
                                  'JPEGImages') + "/" + filename + ".jpg"

        # image is normalized in [-1,1], convert to #_image_depth depth
        image = read_image_jpg(image_path, depth=self._image_depth)
        return image, tf.stack([y_min, x_min, y_max, x_max, label]) 
Example #2
Source File: tf_utils.py    From recommender-tensorflow with MIT License 6 votes vote down vote up
def tf_csv_dataset(csv_path, label_col, col_defaults, shuffle=False, batch_size=32):
    df = dd.read_csv(csv_path)
    # use col_defaults if specified for col, else use defaults base on col type
    type_defaults = {np.int64: 0, np.float64: 0.0, np.object_: ""}
    record_defaults = [[col_defaults.get(col_name, type_defaults.get(col_type.type, ""))]
                       for col_name, col_type in df.dtypes.items()]

    def parse_csv(value):
        columns = tf.decode_csv(value, record_defaults)
        features = dict(zip(df.columns.tolist(), columns))
        label = features[label_col]
        return features, label

    # read, parse, shuffle and batch dataset
    dataset = tf.data.TextLineDataset(csv_path).skip(1)  # skip header
    if shuffle:
        dataset = dataset.shuffle(buffer_size=1024)
    dataset = dataset.map(parse_csv, num_parallel_calls=8)
    dataset = dataset.batch(batch_size)
    return dataset 
Example #3
Source File: input.py    From cloudml-samples with Apache License 2.0 6 votes vote down vote up
def _decode_csv(line):
    """Takes the string input tensor and returns a dict of rank-2 tensors."""

    # Takes a rank-1 tensor and converts it into rank-2 tensor
    # Example if the data is ['csv,line,1', 'csv,line,2', ..] to
    # [['csv,line,1'], ['csv,line,2']] which after parsing will result in a
    # tuple of tensors: [['csv'], ['csv']], [['line'], ['line']], [[1], [2]]
    row_columns = tf.expand_dims(line, -1)
    columns = tf.decode_csv(
        row_columns, record_defaults=constants.CSV_COLUMN_DEFAULTS)
    features = dict(zip(constants.CSV_COLUMNS, columns))

    # Remove unused columns
    unused_columns = set(constants.CSV_COLUMNS) - {col.name for col in
                                                   featurizer.INPUT_COLUMNS} - {
                         constants.LABEL_COLUMN}
    for col in unused_columns:
        features.pop(col)
    return features 
Example #4
Source File: inputs.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def parse_csv(record):
    """Parses columns from comma separated record.

    Defines default values and column names for columns.

    Args:
        record: String representation of the record.

    Returns:
        A dictionary with all column names and values for the record.
    """
    distribution_defaults = [[0.0] for _ in range(constants.DISTRIBUTION_SIZE)]
    weather_defaults = [[0.0] for _ in range(constants.WEATHER_SIZE)]
    distribution_cols = ['distribution' +
                         str(i) for i in range(constants.DISTRIBUTION_SIZE)]
    weather_cols = ['weather' + str(i) for i in range(constants.WEATHER_SIZE)]
    header_def = [[0.0], [''], [0], [0]] + \
        distribution_defaults + weather_defaults
    column_names = [TARGET_COLUMN, 'date', 'day',
                    'hour'] + distribution_cols + weather_cols
    columns = tf.decode_csv(record, record_defaults=header_def)
    return dict(zip(column_names, columns)) 
Example #5
Source File: ml_100k.py    From recommender-tensorflow with MIT License 6 votes vote down vote up
def get_input_fn(csv_path, mode=tf.estimator.ModeKeys.TRAIN, batch_size=32, cutoff=5):
    def input_fn():
        def parse_csv(value):
            columns = tf.decode_csv(value, DEFAULTS)
            features = dict(zip(COLUMNS, columns))
            label = features.pop(LABEL_COL)
            label = tf.math.greater_equal(label, cutoff)
            return features, label

        # read, parse, shuffle and batch dataset
        dataset = tf.data.TextLineDataset(csv_path).skip(1)  # skip header
        if mode == tf.estimator.ModeKeys.TRAIN:
            # shuffle and repeat
            dataset = dataset.shuffle(16 * batch_size).repeat()

        dataset = dataset.map(parse_csv, num_parallel_calls=8)
        dataset = dataset.batch(batch_size)
        return dataset

    return input_fn 
Example #6
Source File: blog_custom_estimators.py    From Live-feed-object-device-identification-using-Tensorflow-and-OpenCV with Apache License 2.0 6 votes vote down vote up
def my_input_fn(file_path, repeat_count=1, shuffle_count=1):
    def decode_csv(line):
        parsed_line = tf.decode_csv(line, [[0.], [0.], [0.], [0.], [0]])
        label = parsed_line[-1]  # Last element is the label
        del parsed_line[-1]  # Delete last element
        features = parsed_line  # Everything but last elements are the features
        d = dict(zip(feature_names, features)), label
        return d

    dataset = (tf.data.TextLineDataset(file_path)  # Read text file
        .skip(1)  # Skip header row
        .map(decode_csv, num_parallel_calls=4)  # Decode each line
        .cache() # Warning: Caches entire dataset, can cause out of memory
        .shuffle(shuffle_count)  # Randomize elems (1 == no operation)
        .repeat(repeat_count)    # Repeats dataset this # times
        .batch(32)
        .prefetch(1)  # Make sure you always have 1 batch ready to serve
    )
    iterator = dataset.make_one_shot_iterator()
    batch_features, batch_labels = iterator.get_next()
    return batch_features, batch_labels 
Example #7
Source File: blog_estimators_dataset.py    From Live-feed-object-device-identification-using-Tensorflow-and-OpenCV with Apache License 2.0 6 votes vote down vote up
def my_input_fn(file_path, perform_shuffle=False, repeat_count=1):
    def decode_csv(line):
        parsed_line = tf.decode_csv(line, [[0.], [0.], [0.], [0.], [0]])
        label = parsed_line[-1]  # Last element is the label
        del parsed_line[-1]  # Delete last element
        features = parsed_line  # Everything but last elements are the features
        d = dict(zip(feature_names, features)), label
        return d

    dataset = (tf.data.TextLineDataset(file_path)  # Read text file
               .skip(1)  # Skip header row
               .map(decode_csv))  # Transform each elem by applying decode_csv fn
    if perform_shuffle:
        # Randomizes input using a window of 256 elements (read into memory)
        dataset = dataset.shuffle(buffer_size=256)
    dataset = dataset.repeat(repeat_count)  # Repeats dataset this # times
    dataset = dataset.batch(32)  # Batch size to use
    iterator = dataset.make_one_shot_iterator()
    batch_features, batch_labels = iterator.get_next()
    return batch_features, batch_labels 
Example #8
Source File: train_higgs.py    From Live-feed-object-device-identification-using-Tensorflow-and-OpenCV with Apache License 2.0 6 votes vote down vote up
def _make_csv_serving_input_receiver_fn(column_names, column_defaults):
  """Returns serving_input_receiver_fn for csv.

  The input arguments are relevant to `tf.decode_csv()`.

  Args:
    column_names: a list of column names in the order within input csv.
    column_defaults: a list of default values with the same size of
        column_names. Each entity must be either a list of one scalar, or an
        empty list to denote the corresponding column is required.
        e.g. [[""], [2.5], []] indicates the third column is required while
            the first column must be string and the second must be float/double.

  Returns:
    a serving_input_receiver_fn that handles csv for serving.
  """
  def serving_input_receiver_fn():
    csv = tf.placeholder(dtype=tf.string, shape=[None], name="csv")
    features = dict(zip(column_names, tf.decode_csv(csv, column_defaults)))
    receiver_tensors = {"inputs": csv}
    return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)

  return serving_input_receiver_fn 
Example #9
Source File: datasets.py    From Machine-Learning-with-TensorFlow-1.x with MIT License 6 votes vote down vote up
def load_files(filename_queue):
    """
    Read and parse examples from data files.

    Args:
        filename: A list of string: filenames to read from

    Returns:
        uint8image: a [height, width, depth] uint8 Tensor with the image data
        label: a int32 Tensor
    """

    line_reader = tf.TextLineReader()
    key, line = line_reader.read(filename_queue)
    label, image_path = tf.decode_csv(records=line,
                                      record_defaults=[tf.constant([], dtype=tf.int32), tf.constant([], dtype=tf.string)],
                                      field_delim=' ')
    file_contents = tf.read_file(image_path)
    image = tf.image.decode_jpeg(file_contents, channels=3)

    return image, label 
Example #10
Source File: inputs.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def _parse_csv(record):
    """Parses columns from comma separated record.

    Defines types and column names for columns.

    Args:
        record: A Tensor of type string. Each string is a record/row in the csv
        and all records should have the same format.

    Returns:
        A dictionary with all column names and values for the record.
    """
    column_defaults = [
        tf.constant([], tf.string),
        tf.constant([], tf.string),
        tf.constant([], tf.int32)]
    column_names = ['img_file', 'subspecies', TARGET_COLUMN]
    columns = tf.decode_csv(record, record_defaults=column_defaults)
    return dict(zip(column_names, columns)) 
Example #11
Source File: utils.py    From CausE with Apache License 2.0 6 votes vote down vote up
def load_train_dataset(dataset_location, batch_size, num_epochs):
    """Load the training data using TF Dataset API"""

    with tf.name_scope('train_dataset_loading'):

        record_defaults = [[1], [1], [0.]] # Sets the type of the resulting tensors and default values
        # Dataset is in the format - UserID ProductID Rating
        dataset = tf.data.TextLineDataset(dataset_location).map(lambda line: tf.decode_csv(line, record_defaults=record_defaults))
        dataset = dataset.shuffle(buffer_size=10000)
        dataset = dataset.batch(batch_size)
        dataset = dataset.prefetch(5)
        dataset = dataset.cache()
        dataset = dataset.repeat(num_epochs)
        iterator = dataset.make_one_shot_iterator()
        user_batch, product_batch, label_batch = iterator.get_next()
        label_batch = tf.expand_dims(label_batch, 1)

    return user_batch, product_batch, label_batch 
Example #12
Source File: dense_classifier.py    From tensorflow_template_application with Apache License 2.0 6 votes vote down vote up
def parse_csv_function(line):
  """
  Decode CSV for Dataset.
  
  Args:
    line: One line data of the CSV.
  
  Return:
    The op of features and labels
  """

  FIELD_DEFAULTS = [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0],
                    [0.0], [0]]

  fields = tf.decode_csv(line, FIELD_DEFAULTS)

  label = fields[-1]
  label = tf.cast(label, tf.int64)
  features = tf.stack(fields[0:-1])

  return features, label 
Example #13
Source File: blog_custom_estimators.py    From yolo_v2 with Apache License 2.0 6 votes vote down vote up
def my_input_fn(file_path, repeat_count=1, shuffle_count=1):
    def decode_csv(line):
        parsed_line = tf.decode_csv(line, [[0.], [0.], [0.], [0.], [0]])
        label = parsed_line[-1]  # Last element is the label
        del parsed_line[-1]  # Delete last element
        features = parsed_line  # Everything but last elements are the features
        d = dict(zip(feature_names, features)), label
        return d

    dataset = (tf.data.TextLineDataset(file_path)  # Read text file
        .skip(1)  # Skip header row
        .map(decode_csv, num_parallel_calls=4)  # Decode each line
        .cache() # Warning: Caches entire dataset, can cause out of memory
        .shuffle(shuffle_count)  # Randomize elems (1 == no operation)
        .repeat(repeat_count)    # Repeats dataset this # times
        .batch(32)
        .prefetch(1)  # Make sure you always have 1 batch ready to serve
    )
    iterator = dataset.make_one_shot_iterator()
    batch_features, batch_labels = iterator.get_next()
    return batch_features, batch_labels 
Example #14
Source File: wide_deep.py    From deep_learning with MIT License 6 votes vote down vote up
def input_fn(data_file, num_epochs, shuffle, batch_size):
    """
    输入函数
    """
    assert tf.gfile.Exists(data_file), ('%s 文件没找到' % data_file)

    def parse_csv(value):
        columns = tf.decode_csv(value, record_defaults=CSV_COLUMN_DEFAULTS)
        features = dict(zip(CSV_COLUMNS, columns))
        labels = features.pop('income_bracket')
        return features, tf.equal(labels, '>50K')

    dataset = tf.data.TextLineDataset(data_file)
    if shuffle:
        dataset = dataset.shuffle(buffer_size=NUM_EXAMPLES['train'])

    dataset = dataset.map(parse_csv, num_parallel_calls=5)
    dataset = dataset.repeat(num_epochs)
    dataset = dataset.batch(batch_size)
    return dataset 
Example #15
Source File: blog_estimators_dataset.py    From Gun-Detector with Apache License 2.0 6 votes vote down vote up
def my_input_fn(file_path, perform_shuffle=False, repeat_count=1):
    def decode_csv(line):
        parsed_line = tf.decode_csv(line, [[0.], [0.], [0.], [0.], [0]])
        label = parsed_line[-1]  # Last element is the label
        del parsed_line[-1]  # Delete last element
        features = parsed_line  # Everything but last elements are the features
        d = dict(zip(feature_names, features)), label
        return d

    dataset = (tf.data.TextLineDataset(file_path)  # Read text file
               .skip(1)  # Skip header row
               .map(decode_csv))  # Transform each elem by applying decode_csv fn
    if perform_shuffle:
        # Randomizes input using a window of 256 elements (read into memory)
        dataset = dataset.shuffle(buffer_size=256)
    dataset = dataset.repeat(repeat_count)  # Repeats dataset this # times
    dataset = dataset.batch(32)  # Batch size to use
    iterator = dataset.make_one_shot_iterator()
    batch_features, batch_labels = iterator.get_next()
    return batch_features, batch_labels 
Example #16
Source File: blog_custom_estimators.py    From Gun-Detector with Apache License 2.0 6 votes vote down vote up
def my_input_fn(file_path, repeat_count=1, shuffle_count=1):
    def decode_csv(line):
        parsed_line = tf.decode_csv(line, [[0.], [0.], [0.], [0.], [0]])
        label = parsed_line[-1]  # Last element is the label
        del parsed_line[-1]  # Delete last element
        features = parsed_line  # Everything but last elements are the features
        d = dict(zip(feature_names, features)), label
        return d

    dataset = (tf.data.TextLineDataset(file_path)  # Read text file
        .skip(1)  # Skip header row
        .map(decode_csv, num_parallel_calls=4)  # Decode each line
        .cache() # Warning: Caches entire dataset, can cause out of memory
        .shuffle(shuffle_count)  # Randomize elems (1 == no operation)
        .repeat(repeat_count)    # Repeats dataset this # times
        .batch(32)
        .prefetch(1)  # Make sure you always have 1 batch ready to serve
    )
    iterator = dataset.make_one_shot_iterator()
    batch_features, batch_labels = iterator.get_next()
    return batch_features, batch_labels 
Example #17
Source File: datasets.py    From self-supervision with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _voc_seg_load_file(path, epochs=None, shuffle=True, seed=0):

    PASCAL_ROOT = os.environ['VOC_DIR']
    filename_queue = tf.train.string_input_producer([path],
            num_epochs=epochs, shuffle=shuffle, seed=seed)

    reader = tf.TextLineReader()
    key, value = reader.read(filename_queue)
    image_path, seg_path = tf.decode_csv(value, record_defaults=[[''], ['']], field_delim=' ')

    image_abspath = PASCAL_ROOT + image_path
    seg_abspath = PASCAL_ROOT + seg_path

    image_content = tf.read_file(image_abspath)
    image = decode_image(image_content, channels=3)
    image.set_shape([None, None, 3])

    imgshape = tf.shape(image)[:2]
    imgname = image_path

    seg_content = tf.read_file(seg_abspath)
    seg = tf.cast(tf.image.decode_png(seg_content, channels=1), tf.int32)
    return image, seg, imgshape, imgname 
Example #18
Source File: datasets.py    From self-supervision with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _imagenet_load_file(path, epochs=None, shuffle=True, seed=0, subset='train', prepare_path=True):
    IMAGENET_ROOT = os.environ.get('IMAGENET_DIR', '')
    if not isinstance(path, list):
        path = [path]
    filename_queue = tf.train.string_input_producer(path,
            num_epochs=epochs, shuffle=shuffle, seed=seed)

    reader = tf.TextLineReader()
    key, value = reader.read(filename_queue)
    image_path, label_str = tf.decode_csv(value, record_defaults=[[''], ['']], field_delim=' ')

    if prepare_path:
        image_abspath = IMAGENET_ROOT + '/images/' + subset + image_path
    else:
        image_abspath = image_path

    image_content = tf.read_file(image_abspath)
    image = decode_image(image_content, channels=3)
    image.set_shape([None, None, 3])

    imgshape = tf.shape(image)[:2]
    label = tf.string_to_number(label_str, out_type=tf.int32)

    return image, label, imgshape, image_path 
Example #19
Source File: datasets.py    From self-supervision with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _relpath_no_label_load_file(path, root_path, epochs=None, shuffle=True, seed=0):
    filename_queue = tf.train.string_input_producer([path],
            num_epochs=epochs, shuffle=shuffle, seed=seed)

    reader = tf.TextLineReader()
    key, value = reader.read(filename_queue)
    #image_path, = tf.decode_csv(value, record_defaults=[['']], field_delim=' ')
    image_path = value

    image_abspath = root_path + '/' + image_path

    image_content = tf.read_file(image_abspath)
    image = decode_image(image_content, channels=3)
    image.set_shape([None, None, 3])

    imgshape = tf.shape(image)[:2]

    return image, imgshape, image_path 
Example #20
Source File: datasets.py    From self-supervision with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _abspath_no_label_load_file(path, epochs=None, shuffle=True, seed=0):
    filename_queue = tf.train.string_input_producer([path],
            num_epochs=epochs, shuffle=shuffle, seed=seed)

    reader = tf.TextLineReader()
    key, value = reader.read(filename_queue)
    #image_path, = tf.decode_csv(value, record_defaults=[['']], field_delim=' ')
    image_path = value

    image_abspath = image_path

    image_content = tf.read_file(image_abspath)
    image = decode_image(image_content, channels=3)
    image.set_shape([None, None, 3])

    imgshape = tf.shape(image)[:2]

    return image, imgshape, image_path 
Example #21
Source File: decode_csv_op_test.py    From deep_image_model with Apache License 2.0 6 votes vote down vote up
def _test(self, args, expected_out=None, expected_err_re=None):
    with self.test_session() as sess:
      decode = tf.decode_csv(**args)

      if expected_err_re is None:
        out = sess.run(decode)

        for i, field in enumerate(out):
          if field.dtype == np.float32:
            self.assertAllClose(field, expected_out[i])
          else:
            self.assertAllEqual(field, expected_out[i])

      else:
        with self.assertRaisesOpError(expected_err_re):
          sess.run(decode) 
Example #22
Source File: supervisor_test.py    From deep_image_model with Apache License 2.0 6 votes vote down vote up
def testManagedEndOfInputOneQueue(self):
    # Tests that the supervisor finishes without an error when using
    # a fixed number of epochs, reading from a single queue.
    logdir = _test_dir("managed_end_of_input_one_queue")
    os.makedirs(logdir)
    data_path = self._csv_data(logdir)
    with tf.Graph().as_default():
      # Create an input pipeline that reads the file 3 times.
      filename_queue = tf.train.string_input_producer([data_path], num_epochs=3)
      reader = tf.TextLineReader()
      _, csv = reader.read(filename_queue)
      rec = tf.decode_csv(csv, record_defaults=[[1], [1], [1]])
      sv = tf.train.Supervisor(logdir=logdir)
      with sv.managed_session("") as sess:
        while not sv.should_stop():
          sess.run(rec) 
Example #23
Source File: supervisor_test.py    From deep_image_model with Apache License 2.0 6 votes vote down vote up
def testManagedEndOfInputTwoQueues(self):
    # Tests that the supervisor finishes without an error when using
    # a fixed number of epochs, reading from two queues, the second
    # one producing a batch from the first one.
    logdir = _test_dir("managed_end_of_input_two_queues")
    os.makedirs(logdir)
    data_path = self._csv_data(logdir)
    with tf.Graph().as_default():
      # Create an input pipeline that reads the file 3 times.
      filename_queue = tf.train.string_input_producer([data_path], num_epochs=3)
      reader = tf.TextLineReader()
      _, csv = reader.read(filename_queue)
      rec = tf.decode_csv(csv, record_defaults=[[1], [1], [1]])
      shuff_rec = tf.train.shuffle_batch(rec, 1, 6, 4)
      sv = tf.train.Supervisor(logdir=logdir)
      with sv.managed_session("") as sess:
        while not sv.should_stop():
          sess.run(shuff_rec) 
Example #24
Source File: supervisor_test.py    From deep_image_model with Apache License 2.0 6 votes vote down vote up
def testManagedMainErrorTwoQueues(self):
    # Tests that the supervisor correctly raises a main loop
    # error even when using multiple queues for input.
    logdir = _test_dir("managed_main_error_two_queues")
    os.makedirs(logdir)
    data_path = self._csv_data(logdir)
    with self.assertRaisesRegexp(RuntimeError, "fail at step 3"):
      with tf.Graph().as_default():
        # Create an input pipeline that reads the file 3 times.
        filename_queue = tf.train.string_input_producer([data_path],
                                                        num_epochs=3)
        reader = tf.TextLineReader()
        _, csv = reader.read(filename_queue)
        rec = tf.decode_csv(csv, record_defaults=[[1], [1], [1]])
        shuff_rec = tf.train.shuffle_batch(rec, 1, 6, 4)
        sv = tf.train.Supervisor(logdir=logdir)
        with sv.managed_session("") as sess:
          for step in range(9):
            if sv.should_stop():
              break
            elif step == 3:
              raise RuntimeError("fail at step 3")
            else:
              sess.run(shuff_rec) 
Example #25
Source File: dataset.py    From wide_deep with MIT License 6 votes vote down vote up
def _column_to_csv_defaults(self):
        """parse columns to record_defaults param in tf.decode_csv func
        Return: 
            OrderedDict {'feature name': [''],...}
        """
        csv_defaults = OrderedDict()
        csv_defaults['label'] = [0]  # first label default, empty if the field is must
        for f in self._feature:
            if f in self._feature_conf:  # used features
                conf = self._feature_conf[f]
                if conf['type'] == 'category':
                    if conf['transform'] == 'identity':  # identity category column need int type
                        csv_defaults[f] = [0]
                    else:
                        csv_defaults[f] = ['']
                else:
                    csv_defaults[f] = [0.0]  # 0.0 for float32
            else:  # unused features
                csv_defaults[f] = ['']
        return csv_defaults 
Example #26
Source File: clock_data.py    From deep-time-reading with MIT License 6 votes vote down vote up
def read_image_and_label(image_label_q):
    # Returns three Tensors: the decoded PNG image, the hour, and the minute.
    filename, hour_str, minute_str = tf.decode_csv(
        image_label_q.dequeue(), [[""], [""], [""]], " ")
    file_contents = tf.read_file(filename)

    # Decode image from PNG, and cast it to a float.
    example = tf.image.decode_png(file_contents, channels=image_channels)
    image = tf.cast(example, tf.float32)

    # Set the tensor size manually from the image.
    image.set_shape([image_size, image_size, image_channels])

    # Do per-image whitening (zero mean, unit standard deviation). Without this,
    # the learning algorithm diverges almost immediately because the gradient is
    # too big.
    image = tf.image.per_image_whitening(image)

    # The label should be an integer.
    hour = tf.string_to_number(hour_str, out_type=tf.int32)
    minute = tf.string_to_number(minute_str, out_type=tf.int32)

    return image, hour, minute 
Example #27
Source File: PASCALVOC2012Localization.py    From dynamic-training-bench with Mozilla Public License 2.0 6 votes vote down vote up
def _read_image_and_box(self, bboxes_csv):
        """Extract the filename from the queue, read the image and
        produce a single box
        Returns:
            image, [y_min, x_min, y_max, x_max, label]
        """

        reader = tf.TextLineReader(skip_header_lines=True)
        _, row = reader.read(bboxes_csv)
        # file ,y_min, x_min, y_max, x_max, label
        record_defaults = [[""], [0.], [0.], [0.], [0.], [0.]]
        # eg:
        # 2008_000033,0.1831831831831832,0.208,0.7717717717717718,0.952,0
        filename, y_min, x_min, y_max, x_max, label = tf.decode_csv(
            row, record_defaults)
        image_path = os.path.join(self._data_dir, 'VOCdevkit', 'VOC2012',
                                  'JPEGImages') + "/" + filename + ".jpg"

        # image is normalized in [-1,1]
        image = read_image_jpg(image_path)
        return image, tf.stack([y_min, x_min, y_max, x_max, label]) 
Example #28
Source File: blog_estimators_dataset.py    From yolo_v2 with Apache License 2.0 6 votes vote down vote up
def my_input_fn(file_path, perform_shuffle=False, repeat_count=1):
    def decode_csv(line):
        parsed_line = tf.decode_csv(line, [[0.], [0.], [0.], [0.], [0]])
        label = parsed_line[-1]  # Last element is the label
        del parsed_line[-1]  # Delete last element
        features = parsed_line  # Everything but last elements are the features
        d = dict(zip(feature_names, features)), label
        return d

    dataset = (tf.data.TextLineDataset(file_path)  # Read text file
               .skip(1)  # Skip header row
               .map(decode_csv))  # Transform each elem by applying decode_csv fn
    if perform_shuffle:
        # Randomizes input using a window of 256 elements (read into memory)
        dataset = dataset.shuffle(buffer_size=256)
    dataset = dataset.repeat(repeat_count)  # Repeats dataset this # times
    dataset = dataset.batch(32)  # Batch size to use
    iterator = dataset.make_one_shot_iterator()
    batch_features, batch_labels = iterator.get_next()
    return batch_features, batch_labels 
Example #29
Source File: model.py    From models with Apache License 2.0 5 votes vote down vote up
def parse_csv(rows_string_tensor):
  """Takes the string input tensor and returns a dict of rank-2 tensors."""
  columns = tf.decode_csv(
      rows_string_tensor, record_defaults=CSV_COLUMN_DEFAULTS)
  features = dict(zip(CSV_COLUMNS, columns))

  # Remove unused columns
  for col in UNUSED_COLUMNS:
    features.pop(col)

  for key, value in six.iteritems(features):
    features[key] = tf.expand_dims(features[key], -1)
  return features 
Example #30
Source File: model.py    From code-snippets with Apache License 2.0 5 votes vote down vote up
def parse_csv(rows_string_tensor):
  """Takes the string input tensor and returns a dict of rank-2 tensors."""

  # Takes a rank-1 tensor and converts it into rank-2 tensor
  # Example if the data is ['csv,line,1', 'csv,line,2', ..] to
  # [['csv,line,1'], ['csv,line,2']] which after parsing will result in a
  # tuple of tensors: [['csv'], ['csv']], [['line'], ['line']], [[1], [2]]
  row_columns = tf.expand_dims(rows_string_tensor, -1)
  columns = tf.decode_csv(row_columns, record_defaults=CSV_COLUMN_DEFAULTS)
  features = dict(zip(CSV_COLUMNS, columns))

  # Remove unused columns
  for col in UNUSED_COLUMNS:
    features.pop(col)
  return features