Python Examples of tensorflow.decode

Source File: PASCALVOC2012Classification.py From dynamic-training-bench with Mozilla Public License 2.0

6 votes

def _read_image_and_box(self, bboxes_csv):
        """Extract the filename from the queue, read the image and
        produce a single box
        Returns:
            image, box
        """

        reader = tf.TextLineReader(skip_header_lines=True)
        _, row = reader.read(bboxes_csv)
        # file ,y_min, x_min, y_max, x_max, label
        record_defaults = [[""], [0.], [0.], [0.], [0.], [0.]]
        # eg:
        # 2008_000033,0.1831831831831832,0.208,0.7717717717717718,0.952,0
        filename, y_min, x_min, y_max, x_max, label = tf.decode_csv(
            row, record_defaults)
        image_path = os.path.join(self._data_dir, 'VOCdevkit', 'VOC2012',
                                  'JPEGImages') + "/" + filename + ".jpg"

        # image is normalized in [-1,1], convert to #_image_depth depth
        image = read_image_jpg(image_path, depth=self._image_depth)
        return image, tf.stack([y_min, x_min, y_max, x_max, label])

Source File: tf_utils.py From recommender-tensorflow with MIT License

6 votes

def tf_csv_dataset(csv_path, label_col, col_defaults, shuffle=False, batch_size=32):
    df = dd.read_csv(csv_path)
    # use col_defaults if specified for col, else use defaults base on col type
    type_defaults = {np.int64: 0, np.float64: 0.0, np.object_: ""}
    record_defaults = [[col_defaults.get(col_name, type_defaults.get(col_type.type, ""))]
                       for col_name, col_type in df.dtypes.items()]

    def parse_csv(value):
        columns = tf.decode_csv(value, record_defaults)
        features = dict(zip(df.columns.tolist(), columns))
        label = features[label_col]
        return features, label

    # read, parse, shuffle and batch dataset
    dataset = tf.data.TextLineDataset(csv_path).skip(1)  # skip header
    if shuffle:
        dataset = dataset.shuffle(buffer_size=1024)
    dataset = dataset.map(parse_csv, num_parallel_calls=8)
    dataset = dataset.batch(batch_size)
    return dataset

Source File: input.py From cloudml-samples with Apache License 2.0

6 votes

def _decode_csv(line):
    """Takes the string input tensor and returns a dict of rank-2 tensors."""

    # Takes a rank-1 tensor and converts it into rank-2 tensor
    # Example if the data is ['csv,line,1', 'csv,line,2', ..] to
    # [['csv,line,1'], ['csv,line,2']] which after parsing will result in a
    # tuple of tensors: [['csv'], ['csv']], [['line'], ['line']], [[1], [2]]
    row_columns = tf.expand_dims(line, -1)
    columns = tf.decode_csv(
        row_columns, record_defaults=constants.CSV_COLUMN_DEFAULTS)
    features = dict(zip(constants.CSV_COLUMNS, columns))

    # Remove unused columns
    unused_columns = set(constants.CSV_COLUMNS) - {col.name for col in
                                                   featurizer.INPUT_COLUMNS} - {
                         constants.LABEL_COLUMN}
    for col in unused_columns:
        features.pop(col)
    return features

Source File: inputs.py From professional-services with Apache License 2.0

6 votes

def parse_csv(record):
    """Parses columns from comma separated record.

    Defines default values and column names for columns.

    Args:
        record: String representation of the record.

    Returns:
        A dictionary with all column names and values for the record.
    """
    distribution_defaults = [[0.0] for _ in range(constants.DISTRIBUTION_SIZE)]
    weather_defaults = [[0.0] for _ in range(constants.WEATHER_SIZE)]
    distribution_cols = ['distribution' +
                         str(i) for i in range(constants.DISTRIBUTION_SIZE)]
    weather_cols = ['weather' + str(i) for i in range(constants.WEATHER_SIZE)]
    header_def = [[0.0], [''], [0], [0]] + \
        distribution_defaults + weather_defaults
    column_names = [TARGET_COLUMN, 'date', 'day',
                    'hour'] + distribution_cols + weather_cols
    columns = tf.decode_csv(record, record_defaults=header_def)
    return dict(zip(column_names, columns))

Source File: ml_100k.py From recommender-tensorflow with MIT License

6 votes

def get_input_fn(csv_path, mode=tf.estimator.ModeKeys.TRAIN, batch_size=32, cutoff=5):
    def input_fn():
        def parse_csv(value):
            columns = tf.decode_csv(value, DEFAULTS)
            features = dict(zip(COLUMNS, columns))
            label = features.pop(LABEL_COL)
            label = tf.math.greater_equal(label, cutoff)
            return features, label

        # read, parse, shuffle and batch dataset
        dataset = tf.data.TextLineDataset(csv_path).skip(1)  # skip header
        if mode == tf.estimator.ModeKeys.TRAIN:
            # shuffle and repeat
            dataset = dataset.shuffle(16 * batch_size).repeat()

        dataset = dataset.map(parse_csv, num_parallel_calls=8)
        dataset = dataset.batch(batch_size)
        return dataset

    return input_fn

Source File: blog_custom_estimators.py From Live-feed-object-device-identification-using-Tensorflow-and-OpenCV with Apache License 2.0

6 votes

def my_input_fn(file_path, repeat_count=1, shuffle_count=1):
    def decode_csv(line):
        parsed_line = tf.decode_csv(line, [[0.], [0.], [0.], [0.], [0]])
        label = parsed_line[-1]  # Last element is the label
        del parsed_line[-1]  # Delete last element
        features = parsed_line  # Everything but last elements are the features
        d = dict(zip(feature_names, features)), label
        return d

    dataset = (tf.data.TextLineDataset(file_path)  # Read text file
        .skip(1)  # Skip header row
        .map(decode_csv, num_parallel_calls=4)  # Decode each line
        .cache() # Warning: Caches entire dataset, can cause out of memory
        .shuffle(shuffle_count)  # Randomize elems (1 == no operation)
        .repeat(repeat_count)    # Repeats dataset this # times
        .batch(32)
        .prefetch(1)  # Make sure you always have 1 batch ready to serve
    )
    iterator = dataset.make_one_shot_iterator()
    batch_features, batch_labels = iterator.get_next()
    return batch_features, batch_labels

Source File: blog_estimators_dataset.py From Live-feed-object-device-identification-using-Tensorflow-and-OpenCV with Apache License 2.0

6 votes

def my_input_fn(file_path, perform_shuffle=False, repeat_count=1):
    def decode_csv(line):
        parsed_line = tf.decode_csv(line, [[0.], [0.], [0.], [0.], [0]])
        label = parsed_line[-1]  # Last element is the label
        del parsed_line[-1]  # Delete last element
        features = parsed_line  # Everything but last elements are the features
        d = dict(zip(feature_names, features)), label
        return d

    dataset = (tf.data.TextLineDataset(file_path)  # Read text file
               .skip(1)  # Skip header row
               .map(decode_csv))  # Transform each elem by applying decode_csv fn
    if perform_shuffle:
        # Randomizes input using a window of 256 elements (read into memory)
        dataset = dataset.shuffle(buffer_size=256)
    dataset = dataset.repeat(repeat_count)  # Repeats dataset this # times
    dataset = dataset.batch(32)  # Batch size to use
    iterator = dataset.make_one_shot_iterator()
    batch_features, batch_labels = iterator.get_next()
    return batch_features, batch_labels

Source File: train_higgs.py From Live-feed-object-device-identification-using-Tensorflow-and-OpenCV with Apache License 2.0

6 votes

def _make_csv_serving_input_receiver_fn(column_names, column_defaults):
  """Returns serving_input_receiver_fn for csv.

  The input arguments are relevant to `tf.decode_csv()`.

  Args:
    column_names: a list of column names in the order within input csv.
    column_defaults: a list of default values with the same size of
        column_names. Each entity must be either a list of one scalar, or an
        empty list to denote the corresponding column is required.
        e.g. [[""], [2.5], []] indicates the third column is required while
            the first column must be string and the second must be float/double.

  Returns:
    a serving_input_receiver_fn that handles csv for serving.
  """
  def serving_input_receiver_fn():
    csv = tf.placeholder(dtype=tf.string, shape=[None], name="csv")
    features = dict(zip(column_names, tf.decode_csv(csv, column_defaults)))
    receiver_tensors = {"inputs": csv}
    return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)

  return serving_input_receiver_fn

Source File: datasets.py From Machine-Learning-with-TensorFlow-1.x with MIT License

6 votes

def load_files(filename_queue):
    """
    Read and parse examples from data files.

    Args:
        filename: A list of string: filenames to read from

    Returns:
        uint8image: a [height, width, depth] uint8 Tensor with the image data
        label: a int32 Tensor
    """

    line_reader = tf.TextLineReader()
    key, line = line_reader.read(filename_queue)
    label, image_path = tf.decode_csv(records=line,
                                      record_defaults=[tf.constant([], dtype=tf.int32), tf.constant([], dtype=tf.string)],
                                      field_delim=' ')
    file_contents = tf.read_file(image_path)
    image = tf.image.decode_jpeg(file_contents, channels=3)

    return image, label

Source File: inputs.py From professional-services with Apache License 2.0

6 votes

def _parse_csv(record):
    """Parses columns from comma separated record.

    Defines types and column names for columns.

    Args:
        record: A Tensor of type string. Each string is a record/row in the csv
        and all records should have the same format.

    Returns:
        A dictionary with all column names and values for the record.
    """
    column_defaults = [
        tf.constant([], tf.string),
        tf.constant([], tf.string),
        tf.constant([], tf.int32)]
    column_names = ['img_file', 'subspecies', TARGET_COLUMN]
    columns = tf.decode_csv(record, record_defaults=column_defaults)
    return dict(zip(column_names, columns))

Source File: utils.py From CausE with Apache License 2.0

6 votes

def load_train_dataset(dataset_location, batch_size, num_epochs):
    """Load the training data using TF Dataset API"""

    with tf.name_scope('train_dataset_loading'):

        record_defaults = [[1], [1], [0.]] # Sets the type of the resulting tensors and default values
        # Dataset is in the format - UserID ProductID Rating
        dataset = tf.data.TextLineDataset(dataset_location).map(lambda line: tf.decode_csv(line, record_defaults=record_defaults))
        dataset = dataset.shuffle(buffer_size=10000)
        dataset = dataset.batch(batch_size)
        dataset = dataset.prefetch(5)
        dataset = dataset.cache()
        dataset = dataset.repeat(num_epochs)
        iterator = dataset.make_one_shot_iterator()
        user_batch, product_batch, label_batch = iterator.get_next()
        label_batch = tf.expand_dims(label_batch, 1)

    return user_batch, product_batch, label_batch

Source File: dense_classifier.py From tensorflow_template_application with Apache License 2.0

6 votes

def parse_csv_function(line):
  """
  Decode CSV for Dataset.
  
  Args:
    line: One line data of the CSV.
  
  Return:
    The op of features and labels
  """

  FIELD_DEFAULTS = [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0],
                    [0.0], [0]]

  fields = tf.decode_csv(line, FIELD_DEFAULTS)

  label = fields[-1]
  label = tf.cast(label, tf.int64)
  features = tf.stack(fields[0:-1])

  return features, label

Source File: blog_custom_estimators.py From yolo_v2 with Apache License 2.0

6 votes

def my_input_fn(file_path, repeat_count=1, shuffle_count=1):
    def decode_csv(line):
        parsed_line = tf.decode_csv(line, [[0.], [0.], [0.], [0.], [0]])
        label = parsed_line[-1]  # Last element is the label
        del parsed_line[-1]  # Delete last element
        features = parsed_line  # Everything but last elements are the features
        d = dict(zip(feature_names, features)), label
        return d

    dataset = (tf.data.TextLineDataset(file_path)  # Read text file
        .skip(1)  # Skip header row
        .map(decode_csv, num_parallel_calls=4)  # Decode each line
        .cache() # Warning: Caches entire dataset, can cause out of memory
        .shuffle(shuffle_count)  # Randomize elems (1 == no operation)
        .repeat(repeat_count)    # Repeats dataset this # times
        .batch(32)
        .prefetch(1)  # Make sure you always have 1 batch ready to serve
    )
    iterator = dataset.make_one_shot_iterator()
    batch_features, batch_labels = iterator.get_next()
    return batch_features, batch_labels

Source File: wide_deep.py From deep_learning with MIT License

6 votes

def input_fn(data_file, num_epochs, shuffle, batch_size):
    """
    输入函数
    """
    assert tf.gfile.Exists(data_file), ('%s 文件没找到' % data_file)

    def parse_csv(value):
        columns = tf.decode_csv(value, record_defaults=CSV_COLUMN_DEFAULTS)
        features = dict(zip(CSV_COLUMNS, columns))
        labels = features.pop('income_bracket')
        return features, tf.equal(labels, '>50K')

    dataset = tf.data.TextLineDataset(data_file)
    if shuffle:
        dataset = dataset.shuffle(buffer_size=NUM_EXAMPLES['train'])

    dataset = dataset.map(parse_csv, num_parallel_calls=5)
    dataset = dataset.repeat(num_epochs)
    dataset = dataset.batch(batch_size)
    return dataset

Source File: blog_estimators_dataset.py From Gun-Detector with Apache License 2.0

6 votes

def my_input_fn(file_path, perform_shuffle=False, repeat_count=1):
    def decode_csv(line):
        parsed_line = tf.decode_csv(line, [[0.], [0.], [0.], [0.], [0]])
        label = parsed_line[-1]  # Last element is the label
        del parsed_line[-1]  # Delete last element
        features = parsed_line  # Everything but last elements are the features
        d = dict(zip(feature_names, features)), label
        return d

    dataset = (tf.data.TextLineDataset(file_path)  # Read text file
               .skip(1)  # Skip header row
               .map(decode_csv))  # Transform each elem by applying decode_csv fn
    if perform_shuffle:
        # Randomizes input using a window of 256 elements (read into memory)
        dataset = dataset.shuffle(buffer_size=256)
    dataset = dataset.repeat(repeat_count)  # Repeats dataset this # times
    dataset = dataset.batch(32)  # Batch size to use
    iterator = dataset.make_one_shot_iterator()
    batch_features, batch_labels = iterator.get_next()
    return batch_features, batch_labels

Source File: blog_custom_estimators.py From Gun-Detector with Apache License 2.0

6 votes

def my_input_fn(file_path, repeat_count=1, shuffle_count=1):
    def decode_csv(line):
        parsed_line = tf.decode_csv(line, [[0.], [0.], [0.], [0.], [0]])
        label = parsed_line[-1]  # Last element is the label
        del parsed_line[-1]  # Delete last element
        features = parsed_line  # Everything but last elements are the features
        d = dict(zip(feature_names, features)), label
        return d

    dataset = (tf.data.TextLineDataset(file_path)  # Read text file
        .skip(1)  # Skip header row
        .map(decode_csv, num_parallel_calls=4)  # Decode each line
        .cache() # Warning: Caches entire dataset, can cause out of memory
        .shuffle(shuffle_count)  # Randomize elems (1 == no operation)
        .repeat(repeat_count)    # Repeats dataset this # times
        .batch(32)
        .prefetch(1)  # Make sure you always have 1 batch ready to serve
    )
    iterator = dataset.make_one_shot_iterator()
    batch_features, batch_labels = iterator.get_next()
    return batch_features, batch_labels

Source File: datasets.py From self-supervision with BSD 3-Clause "New" or "Revised" License

6 votes

def _voc_seg_load_file(path, epochs=None, shuffle=True, seed=0):

    PASCAL_ROOT = os.environ['VOC_DIR']
    filename_queue = tf.train.string_input_producer([path],
            num_epochs=epochs, shuffle=shuffle, seed=seed)

    reader = tf.TextLineReader()
    key, value = reader.read(filename_queue)
    image_path, seg_path = tf.decode_csv(value, record_defaults=[[''], ['']], field_delim=' ')

    image_abspath = PASCAL_ROOT + image_path
    seg_abspath = PASCAL_ROOT + seg_path

    image_content = tf.read_file(image_abspath)
    image = decode_image(image_content, channels=3)
    image.set_shape([None, None, 3])

    imgshape = tf.shape(image)[:2]
    imgname = image_path

    seg_content = tf.read_file(seg_abspath)
    seg = tf.cast(tf.image.decode_png(seg_content, channels=1), tf.int32)
    return image, seg, imgshape, imgname

Source File: datasets.py From self-supervision with BSD 3-Clause "New" or "Revised" License

6 votes

def _imagenet_load_file(path, epochs=None, shuffle=True, seed=0, subset='train', prepare_path=True):
    IMAGENET_ROOT = os.environ.get('IMAGENET_DIR', '')
    if not isinstance(path, list):
        path = [path]
    filename_queue = tf.train.string_input_producer(path,
            num_epochs=epochs, shuffle=shuffle, seed=seed)

    reader = tf.TextLineReader()
    key, value = reader.read(filename_queue)
    image_path, label_str = tf.decode_csv(value, record_defaults=[[''], ['']], field_delim=' ')

    if prepare_path:
        image_abspath = IMAGENET_ROOT + '/images/' + subset + image_path
    else:
        image_abspath = image_path

    image_content = tf.read_file(image_abspath)
    image = decode_image(image_content, channels=3)
    image.set_shape([None, None, 3])

    imgshape = tf.shape(image)[:2]
    label = tf.string_to_number(label_str, out_type=tf.int32)

    return image, label, imgshape, image_path

Source File: datasets.py From self-supervision with BSD 3-Clause "New" or "Revised" License

6 votes

def _relpath_no_label_load_file(path, root_path, epochs=None, shuffle=True, seed=0):
    filename_queue = tf.train.string_input_producer([path],
            num_epochs=epochs, shuffle=shuffle, seed=seed)

    reader = tf.TextLineReader()
    key, value = reader.read(filename_queue)
    #image_path, = tf.decode_csv(value, record_defaults=[['']], field_delim=' ')
    image_path = value

    image_abspath = root_path + '/' + image_path

    image_content = tf.read_file(image_abspath)
    image = decode_image(image_content, channels=3)
    image.set_shape([None, None, 3])

    imgshape = tf.shape(image)[:2]

    return image, imgshape, image_path

Source File: datasets.py From self-supervision with BSD 3-Clause "New" or "Revised" License

6 votes

def _abspath_no_label_load_file(path, epochs=None, shuffle=True, seed=0):
    filename_queue = tf.train.string_input_producer([path],
            num_epochs=epochs, shuffle=shuffle, seed=seed)

    reader = tf.TextLineReader()
    key, value = reader.read(filename_queue)
    #image_path, = tf.decode_csv(value, record_defaults=[['']], field_delim=' ')
    image_path = value

    image_abspath = image_path

    image_content = tf.read_file(image_abspath)
    image = decode_image(image_content, channels=3)
    image.set_shape([None, None, 3])

    imgshape = tf.shape(image)[:2]

    return image, imgshape, image_path

Source File: decode_csv_op_test.py From deep_image_model with Apache License 2.0

6 votes

def _test(self, args, expected_out=None, expected_err_re=None):
    with self.test_session() as sess:
      decode = tf.decode_csv(**args)

      if expected_err_re is None:
        out = sess.run(decode)

        for i, field in enumerate(out):
          if field.dtype == np.float32:
            self.assertAllClose(field, expected_out[i])
          else:
            self.assertAllEqual(field, expected_out[i])

      else:
        with self.assertRaisesOpError(expected_err_re):
          sess.run(decode)

Source File: supervisor_test.py From deep_image_model with Apache License 2.0

6 votes

def testManagedEndOfInputOneQueue(self):
    # Tests that the supervisor finishes without an error when using
    # a fixed number of epochs, reading from a single queue.
    logdir = _test_dir("managed_end_of_input_one_queue")
    os.makedirs(logdir)
    data_path = self._csv_data(logdir)
    with tf.Graph().as_default():
      # Create an input pipeline that reads the file 3 times.
      filename_queue = tf.train.string_input_producer([data_path], num_epochs=3)
      reader = tf.TextLineReader()
      _, csv = reader.read(filename_queue)
      rec = tf.decode_csv(csv, record_defaults=[[1], [1], [1]])
      sv = tf.train.Supervisor(logdir=logdir)
      with sv.managed_session("") as sess:
        while not sv.should_stop():
          sess.run(rec)

Source File: supervisor_test.py From deep_image_model with Apache License 2.0

6 votes

def testManagedEndOfInputTwoQueues(self):
    # Tests that the supervisor finishes without an error when using
    # a fixed number of epochs, reading from two queues, the second
    # one producing a batch from the first one.
    logdir = _test_dir("managed_end_of_input_two_queues")
    os.makedirs(logdir)
    data_path = self._csv_data(logdir)
    with tf.Graph().as_default():
      # Create an input pipeline that reads the file 3 times.
      filename_queue = tf.train.string_input_producer([data_path], num_epochs=3)
      reader = tf.TextLineReader()
      _, csv = reader.read(filename_queue)
      rec = tf.decode_csv(csv, record_defaults=[[1], [1], [1]])
      shuff_rec = tf.train.shuffle_batch(rec, 1, 6, 4)
      sv = tf.train.Supervisor(logdir=logdir)
      with sv.managed_session("") as sess:
        while not sv.should_stop():
          sess.run(shuff_rec)

Source File: supervisor_test.py From deep_image_model with Apache License 2.0

6 votes

def testManagedMainErrorTwoQueues(self):
    # Tests that the supervisor correctly raises a main loop
    # error even when using multiple queues for input.
    logdir = _test_dir("managed_main_error_two_queues")
    os.makedirs(logdir)
    data_path = self._csv_data(logdir)
    with self.assertRaisesRegexp(RuntimeError, "fail at step 3"):
      with tf.Graph().as_default():
        # Create an input pipeline that reads the file 3 times.
        filename_queue = tf.train.string_input_producer([data_path],
                                                        num_epochs=3)
        reader = tf.TextLineReader()
        _, csv = reader.read(filename_queue)
        rec = tf.decode_csv(csv, record_defaults=[[1], [1], [1]])
        shuff_rec = tf.train.shuffle_batch(rec, 1, 6, 4)
        sv = tf.train.Supervisor(logdir=logdir)
        with sv.managed_session("") as sess:
          for step in range(9):
            if sv.should_stop():
              break
            elif step == 3:
              raise RuntimeError("fail at step 3")
            else:
              sess.run(shuff_rec)

Source File: dataset.py From wide_deep with MIT License

6 votes

def _column_to_csv_defaults(self):
        """parse columns to record_defaults param in tf.decode_csv func
        Return: 
            OrderedDict {'feature name': [''],...}
        """
        csv_defaults = OrderedDict()
        csv_defaults['label'] = [0]  # first label default, empty if the field is must
        for f in self._feature:
            if f in self._feature_conf:  # used features
                conf = self._feature_conf[f]
                if conf['type'] == 'category':
                    if conf['transform'] == 'identity':  # identity category column need int type
                        csv_defaults[f] = [0]
                    else:
                        csv_defaults[f] = ['']
                else:
                    csv_defaults[f] = [0.0]  # 0.0 for float32
            else:  # unused features
                csv_defaults[f] = ['']
        return csv_defaults

Source File: clock_data.py From deep-time-reading with MIT License

6 votes

def read_image_and_label(image_label_q):
    # Returns three Tensors: the decoded PNG image, the hour, and the minute.
    filename, hour_str, minute_str = tf.decode_csv(
        image_label_q.dequeue(), [[""], [""], [""]], " ")
    file_contents = tf.read_file(filename)

    # Decode image from PNG, and cast it to a float.
    example = tf.image.decode_png(file_contents, channels=image_channels)
    image = tf.cast(example, tf.float32)

    # Set the tensor size manually from the image.
    image.set_shape([image_size, image_size, image_channels])

    # Do per-image whitening (zero mean, unit standard deviation). Without this,
    # the learning algorithm diverges almost immediately because the gradient is
    # too big.
    image = tf.image.per_image_whitening(image)

    # The label should be an integer.
    hour = tf.string_to_number(hour_str, out_type=tf.int32)
    minute = tf.string_to_number(minute_str, out_type=tf.int32)

    return image, hour, minute

Source File: PASCALVOC2012Localization.py From dynamic-training-bench with Mozilla Public License 2.0

6 votes

def _read_image_and_box(self, bboxes_csv):
        """Extract the filename from the queue, read the image and
        produce a single box
        Returns:
            image, [y_min, x_min, y_max, x_max, label]
        """

        reader = tf.TextLineReader(skip_header_lines=True)
        _, row = reader.read(bboxes_csv)
        # file ,y_min, x_min, y_max, x_max, label
        record_defaults = [[""], [0.], [0.], [0.], [0.], [0.]]
        # eg:
        # 2008_000033,0.1831831831831832,0.208,0.7717717717717718,0.952,0
        filename, y_min, x_min, y_max, x_max, label = tf.decode_csv(
            row, record_defaults)
        image_path = os.path.join(self._data_dir, 'VOCdevkit', 'VOC2012',
                                  'JPEGImages') + "/" + filename + ".jpg"

        # image is normalized in [-1,1]
        image = read_image_jpg(image_path)
        return image, tf.stack([y_min, x_min, y_max, x_max, label])

Source File: blog_estimators_dataset.py From yolo_v2 with Apache License 2.0

6 votes

def my_input_fn(file_path, perform_shuffle=False, repeat_count=1):
    def decode_csv(line):
        parsed_line = tf.decode_csv(line, [[0.], [0.], [0.], [0.], [0]])
        label = parsed_line[-1]  # Last element is the label
        del parsed_line[-1]  # Delete last element
        features = parsed_line  # Everything but last elements are the features
        d = dict(zip(feature_names, features)), label
        return d

    dataset = (tf.data.TextLineDataset(file_path)  # Read text file
               .skip(1)  # Skip header row
               .map(decode_csv))  # Transform each elem by applying decode_csv fn
    if perform_shuffle:
        # Randomizes input using a window of 256 elements (read into memory)
        dataset = dataset.shuffle(buffer_size=256)
    dataset = dataset.repeat(repeat_count)  # Repeats dataset this # times
    dataset = dataset.batch(32)  # Batch size to use
    iterator = dataset.make_one_shot_iterator()
    batch_features, batch_labels = iterator.get_next()
    return batch_features, batch_labels

Source File: model.py From models with Apache License 2.0

5 votes

def parse_csv(rows_string_tensor):
  """Takes the string input tensor and returns a dict of rank-2 tensors."""
  columns = tf.decode_csv(
      rows_string_tensor, record_defaults=CSV_COLUMN_DEFAULTS)
  features = dict(zip(CSV_COLUMNS, columns))

  # Remove unused columns
  for col in UNUSED_COLUMNS:
    features.pop(col)

  for key, value in six.iteritems(features):
    features[key] = tf.expand_dims(features[key], -1)
  return features

Source File: model.py From code-snippets with Apache License 2.0

5 votes

def parse_csv(rows_string_tensor):
  """Takes the string input tensor and returns a dict of rank-2 tensors."""

  # Takes a rank-1 tensor and converts it into rank-2 tensor
  # Example if the data is ['csv,line,1', 'csv,line,2', ..] to
  # [['csv,line,1'], ['csv,line,2']] which after parsing will result in a
  # tuple of tensors: [['csv'], ['csv']], [['line'], ['line']], [[1], [2]]
  row_columns = tf.expand_dims(rows_string_tensor, -1)
  columns = tf.decode_csv(row_columns, record_defaults=CSV_COLUMN_DEFAULTS)
  features = dict(zip(CSV_COLUMNS, columns))

  # Remove unused columns
  for col in UNUSED_COLUMNS:
    features.pop(col)
  return features

Python tensorflow.decode_csv() Examples