Python Examples of tensorflow.FixedLenFeature

Source File: dataset.py From DNA-GAN with MIT License

7 votes

def parse_fn(self, serialized_example):
        features={
            'image/id_name': tf.FixedLenFeature([], tf.string),
            'image/height' : tf.FixedLenFeature([], tf.int64),
            'image/width'  : tf.FixedLenFeature([], tf.int64),
            'image/encoded': tf.FixedLenFeature([], tf.string),
        }
        for name in self.feature_list:
            features[name] = tf.FixedLenFeature([], tf.int64)

        example = tf.parse_single_example(serialized_example, features=features)
        image = tf.decode_raw(example['image/encoded'], tf.uint8)
        raw_height = tf.cast(example['image/height'], tf.int32)
        raw_width = tf.cast(example['image/width'], tf.int32)
        image = tf.reshape(image, [raw_height, raw_width, 3])
        image = tf.image.resize_images(image, size=[self.height, self.width])
        # from IPython import embed; embed(); exit()

        feature_val_list = [tf.cast(example[name], tf.float32) for name in self.feature_list]
        return image, feature_val_list

Source File: readers.py From Youtube-8M-WILLOW with Apache License 2.0

6 votes

def prepare_serialized_examples(self, serialized_examples):
    # set the mapping from the fields to data types in the proto
    num_features = len(self.feature_names)
    assert num_features > 0, "self.feature_names is empty!"
    assert len(self.feature_names) == len(self.feature_sizes), \
    "length of feature_names (={}) != length of feature_sizes (={})".format( \
    len(self.feature_names), len(self.feature_sizes))

    feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
                   "labels": tf.VarLenFeature(tf.int64)}
    for feature_index in range(num_features):
      feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
          [self.feature_sizes[feature_index]], tf.float32)

    features = tf.parse_example(serialized_examples, features=feature_map)

    labels = tf.sparse_to_indicator(features["labels"], self.num_classes)
    labels.set_shape([None, self.num_classes])
    concatenated_features = tf.concat([
        features[feature_name] for feature_name in self.feature_names], 1)

    return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])

Source File: reader.py From CapsLayer with Apache License 2.0

6 votes

def parse_fun(serialized_example):
    """ Data parsing function.
    """
    features = tf.parse_single_example(serialized_example,
                                       features={'image': tf.FixedLenFeature([], tf.string),
                                                 'label': tf.FixedLenFeature([], tf.int64),
                                                 'height': tf.FixedLenFeature([], tf.int64),
                                                 'width': tf.FixedLenFeature([], tf.int64),
                                                 'depth': tf.FixedLenFeature([], tf.int64)})
    height = tf.cast(features['height'], tf.int32)
    width = tf.cast(features['width'], tf.int32)
    depth = tf.cast(features['depth'], tf.int32)
    image = tf.decode_raw(features['image'], tf.float32)
    image = tf.reshape(image, shape=[height * width * depth])
    image.set_shape([28 * 28 * 1])
    image = tf.cast(image, tf.float32) * (1. / 255)
    label = tf.cast(features['label'], tf.int32)
    features = {'images': image, 'labels': label}
    return(features)

Source File: gym_problems.py From fine-lm with MIT License

6 votes

def extra_reading_spec(self):
    """Additional data fields to store on disk and their decoders."""

    # TODO(piotrmilos): shouldn't done be included here?
    data_fields = {
        "frame_number": tf.FixedLenFeature([1], tf.int64),
        "action": tf.FixedLenFeature([1], tf.int64),
        "reward": tf.FixedLenFeature([1], tf.int64)
    }
    decoders = {
        "frame_number":
            tf.contrib.slim.tfexample_decoder.Tensor(tensor_key="frame_number"),
        "action":
            tf.contrib.slim.tfexample_decoder.Tensor(tensor_key="action"),
        "reward":
            tf.contrib.slim.tfexample_decoder.Tensor(tensor_key="reward"),
    }
    return data_fields, decoders

Source File: read_tfrecord.py From 2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement with MIT License

6 votes

def _extract_features_batch(self, serialized_batch):
        features = tf.parse_example(
            serialized_batch,
            features={'images': tf.FixedLenFeature([], tf.string),
                'imagepaths': tf.FixedLenFeature([], tf.string),
                'labels': tf.VarLenFeature(tf.int64),
                 })

        bs = features['images'].shape[0]
        images = tf.decode_raw(features['images'], tf.uint8)
        w, h = tuple(CFG.ARCH.INPUT_SIZE)
        images = tf.cast(x=images, dtype=tf.float32)
        #images = tf.subtract(tf.divide(images, 128.0), 1.0)
        images = tf.reshape(images, [bs, h, -1, CFG.ARCH.INPUT_CHANNELS])

        labels = features['labels']
        labels = tf.cast(labels, tf.int32)

        imagepaths = features['imagepaths']

        return images, labels, imagepaths

Source File: reader.py From CapsLayer with Apache License 2.0

6 votes

def parse_fun(serialized_example):
    """ Data parsing function.
    """
    features = tf.parse_single_example(serialized_example,
                                       features={'image': tf.FixedLenFeature([], tf.string),
                                                 'label': tf.FixedLenFeature([], tf.int64),
                                                 'height': tf.FixedLenFeature([], tf.int64),
                                                 'width': tf.FixedLenFeature([], tf.int64),
                                                 'depth': tf.FixedLenFeature([], tf.int64)})
    height = tf.cast(features['height'], tf.int32)
    width = tf.cast(features['width'], tf.int32)
    depth = tf.cast(features['depth'], tf.int32)
    image = tf.decode_raw(features['image'], tf.float32)
    image = tf.reshape(image, shape=[height * width * depth])
    image.set_shape([28 * 28 * 1])
    image = tf.cast(image, tf.float32) * (1. / 255)
    label = tf.cast(features['label'], tf.int32)
    features = {'images': image, 'labels': label}
    return(features)

Source File: train.py From centernet_tensorflow_wilderface_voc with MIT License

6 votes

def parse_color_data(example_proto):
    features = {"img_raw": tf.FixedLenFeature([], tf.string),
                "label": tf.FixedLenFeature([], tf.string),
                "width": tf.FixedLenFeature([], tf.int64),
                "height": tf.FixedLenFeature([], tf.int64)}
    parsed_features = tf.parse_single_example(example_proto, features)
    img = parsed_features["img_raw"]
    img = tf.decode_raw(img, tf.uint8)
    width = parsed_features["width"]
    height = parsed_features["height"]
    img = tf.reshape(img, [height, width, 3])
    img = tf.cast(img, tf.float32) * (1. / 255.) - 0.5
    label = parsed_features["label"]
    label = tf.decode_raw(label, tf.float32)

    return img, label

Source File: loader.py From SketchCNN with MIT License

6 votes

def _read_raw(self):
        """Read raw data from TFRecord.

        Returns:
            :return: data list [input_raw, label_raw].
        """
        self._reader = tf.TFRecordReader()

        _, serialized_example = self._reader.read(self._queue)

        features = tf.parse_single_example(serialized_example,
                                           features={
                                               'name': tf.FixedLenFeature([], tf.string),
                                               'block': tf.FixedLenFeature([], tf.string)
                                           })

        input_raw, label_raw = decode_block(features['block'], tensor_size=self._raw_size)

        if self._with_key:
            return input_raw, label_raw, features['name']
        return input_raw, label_raw

Source File: video_utils.py From fine-lm with MIT License

6 votes

def example_reading_spec(self):
    extra_data_fields, extra_data_items_to_decoders = self.extra_reading_spec

    data_fields = {
        "image/encoded": tf.FixedLenFeature((), tf.string),
        "image/format": tf.FixedLenFeature((), tf.string),
    }
    data_fields.update(extra_data_fields)

    data_items_to_decoders = {
        "frame":
            tf.contrib.slim.tfexample_decoder.Image(
                image_key="image/encoded",
                format_key="image/format",
                shape=[self.frame_height, self.frame_width, self.num_channels],
                channels=self.num_channels),
    }
    data_items_to_decoders.update(extra_data_items_to_decoders)

    return data_fields, data_items_to_decoders

Source File: vfn_train.py From view-finding-network with GNU General Public License v3.0

6 votes

def read_and_decode_aug(filename_queue):
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(
        serialized_example,
      # Defaults are not specified since both keys are required.
        features={
            'image_raw': tf.FixedLenFeature([], tf.string),
        })

    image = tf.decode_raw(features['image_raw'], tf.uint8)
    image = tf.image.random_flip_left_right(tf.reshape(image, [227, 227, 6]))
  # Convert from [0, 255] -> [-0.5, 0.5] floats.
    image = tf.cast(image, tf.float32) * (1. / 255) - 0.5
    image = tf.image.random_brightness(image, 0.01)
    image = tf.image.random_contrast(image, 0.95, 1.05)
    return tf.split(image, 2, 2) # 3rd dimension two parts

Source File: vfn_train.py From view-finding-network with GNU General Public License v3.0

6 votes

def read_and_decode(filename_queue):
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(
        serialized_example,
      # Defaults are not specified since both keys are required.
        features={
            'image_raw': tf.FixedLenFeature([], tf.string),
        })

    image = tf.decode_raw(features['image_raw'], tf.uint8)
    image = tf.reshape(image, [227, 227, 6])

  # Convert from [0, 255] -> [-0.5, 0.5] floats.
    image = tf.cast(image, tf.float32) * (1. / 255) - 0.5
    return tf.split(image, 2, 2) # 3rd dimension two parts

Source File: inputs.py From ffn with Apache License 2.0

6 votes

def load_patch_coordinates_from_filename_queue(filename_queue):
  """Loads coordinates and volume names from filename queue.

  Args:
    filename_queue: Tensorflow queue created from create_filename_queue()

  Returns:
    Tuple of coordinates (shape `[1, 3]`) and volume name (shape `[1]`) tensors.
  """
  record_options = tf.python_io.TFRecordOptions(
      tf.python_io.TFRecordCompressionType.GZIP)
  keys, protos = tf.TFRecordReader(options=record_options).read(filename_queue)
  examples = tf.parse_single_example(protos, features=dict(
      center=tf.FixedLenFeature(shape=[1, 3], dtype=tf.int64),
      label_volume_name=tf.FixedLenFeature(shape=[1], dtype=tf.string),
  ))
  coord = examples['center']
  volname = examples['label_volume_name']
  return coord, volname

Source File: model.py From cloudml-dist-mnist-example with Apache License 2.0

6 votes

def read_and_decode(filename_queue):
  reader = tf.TFRecordReader()
  _, serialized_example = reader.read(filename_queue)

  features = tf.parse_single_example(
      serialized_example,
      features={
          'image_raw': tf.FixedLenFeature([], tf.string),
          'label': tf.FixedLenFeature([], tf.int64),
      })

  image = tf.decode_raw(features['image_raw'], tf.uint8)
  image.set_shape([784])
  image = tf.cast(image, tf.float32) * (1. / 255)
  label = tf.cast(features['label'], tf.int32)

  return image, label

Source File: tf_schema_utils_test.py From spotify-tensorflow with Apache License 2.0

5 votes

def test_round_trip(self):
        feature_spec = {
            "scalar_feature_1": tf.FixedLenFeature(shape=[], dtype=tf.int64),
            "scalar_feature_2": tf.FixedLenFeature(shape=[], dtype=tf.int64),
            "scalar_feature_3": tf.FixedLenFeature(shape=[], dtype=tf.float32),
            "varlen_feature_1": tf.VarLenFeature(dtype=tf.float32),
            "varlen_feature_2": tf.VarLenFeature(dtype=tf.string),
            "1d_vector_feature": tf.FixedLenFeature(shape=[1], dtype=tf.string),
            "2d_vector_feature": tf.FixedLenFeature(shape=[2, 2], dtype=tf.float32),
            "sparse_feature": tf.SparseFeature("idx", "value", tf.float32, 10),
        }
        inferred_schema = feature_spec_to_schema(feature_spec)
        inferred_feature_spec = schema_to_feature_spec(inferred_schema)
        self.assertEqual(inferred_feature_spec, feature_spec)

Source File: test_data2.py From basenji with Apache License 2.0

5 votes

def parse_proto(example_protos):
  features = {
    'genome': tf.FixedLenFeature([1], tf.int64),
    'sequence': tf.FixedLenFeature([], tf.string),
    'target': tf.FixedLenFeature([], tf.string)
  }
  parsed_features = tf.parse_example(example_protos, features=features)
  genome = parsed_features['genome']
  seq = tf.decode_raw(parsed_features['sequence'], tf.uint8)
  targets = tf.decode_raw(parsed_features['target'], tf.float16)
  return {'genome': genome, 'sequence': seq, 'target': targets}

Source File: reader.py From CycleGAN-TensorFlow with MIT License

5 votes

def feed(self):
    """
    Returns:
      images: 4D tensor [batch_size, image_width, image_height, image_depth]
    """
    with tf.name_scope(self.name):
      filename_queue = tf.train.string_input_producer([self.tfrecords_file])
      reader = tf.TFRecordReader()

      _, serialized_example = self.reader.read(filename_queue)
      features = tf.parse_single_example(
          serialized_example,
          features={
            'image/file_name': tf.FixedLenFeature([], tf.string),
            'image/encoded_image': tf.FixedLenFeature([], tf.string),
          })

      image_buffer = features['image/encoded_image']
      image = tf.image.decode_jpeg(image_buffer, channels=3)
      image = self._preprocess(image)
      images = tf.train.shuffle_batch(
            [image], batch_size=self.batch_size, num_threads=self.num_threads,
            capacity=self.min_queue_examples + 3*self.batch_size,
            min_after_dequeue=self.min_queue_examples
          )

      tf.summary.image('_input', images)
    return images

Source File: test_data.py From basenji with Apache License 2.0

5 votes

def parse_proto(example_protos):
  features = {
    tfrecord_util.TFR_INPUT: tf.FixedLenFeature([], tf.string),
    tfrecord_util.TFR_OUTPUT: tf.FixedLenFeature([], tf.string)
  }
  parsed_features = tf.parse_example(example_protos, features=features)
  seq = tf.decode_raw(parsed_features[tfrecord_util.TFR_INPUT], tf.uint8)
  targets = tf.decode_raw(parsed_features[tfrecord_util.TFR_OUTPUT], tf.float16)
  return {tfrecord_util.TFR_INPUT: seq, tfrecord_util.TFR_OUTPUT: targets}

Source File: tfr_qc.py From basenji with Apache License 2.0

5 votes

def parse_proto(example_protos):
  features = {
    'genome': tf.FixedLenFeature([1], tf.int64),
    'sequence': tf.FixedLenFeature([], tf.string),
    'target': tf.FixedLenFeature([], tf.string)
  }
  parsed_features = tf.parse_example(example_protos, features=features)
  seq = tf.decode_raw(parsed_features['sequence'], tf.uint8)
  targets = tf.decode_raw(parsed_features['target'], tf.float16)
  return {'sequence': seq, 'targets': targets}


################################################################################
# __main__
################################################################################

Source File: tfr_bw.py From basenji with Apache License 2.0

5 votes

def parse_proto(example_protos):
    features = {
        'genome': tf.FixedLenFeature([1], tf.int64),
        'sequence': tf.FixedLenFeature([], tf.string),
        'target': tf.FixedLenFeature([], tf.string)
    }
    parsed_features = tf.parse_example(example_protos, features=features)
    genome = parsed_features['genome']
    seq = tf.decode_raw(parsed_features['sequence'], tf.uint8)
    targets = tf.decode_raw(parsed_features['target'], tf.float16)
    return {'genome': genome, 'sequence': seq, 'target': targets}

################################################################################
# __main__
################################################################################

Source File: test_augment.py From basenji with Apache License 2.0

5 votes

def make_data_op(tfr_pattern, seq_length, target_length):
  dataset = tf.data.Dataset.list_files(tfr_pattern)

  def file_to_records(filename):
    return tf.data.TFRecordDataset(filename, compression_type='ZLIB')
  dataset = dataset.flat_map(file_to_records)

  def parse_proto(example_protos):
    features = {
      tfrecord_util.TFR_INPUT: tf.FixedLenFeature([], tf.string),
      tfrecord_util.TFR_OUTPUT: tf.FixedLenFeature([], tf.string)
    }
    parsed_features = tf.parse_example(example_protos, features=features)

    seq = tf.decode_raw(parsed_features[tfrecord_util.TFR_INPUT], tf.uint8)
    seq = tf.reshape(seq, [1, seq_length, -1])
    seq = tf.cast(seq, tf.float32)

    targets = tf.decode_raw(parsed_features[tfrecord_util.TFR_OUTPUT], tf.float16)
    targets = tf.reshape(targets, (1, target_length, -1))
    targets = tf.cast(targets, tf.float32)

    na = tf.zeros(targets.shape[:-1], dtype=tf.bool)

    return {'sequence': seq, 'label': targets, 'na':na}

  dataset = dataset.batch(1)
  dataset = dataset.map(parse_proto)

  iterator = dataset.make_one_shot_iterator()
  try:
    next_op = iterator.get_next()
  except tf.errors.OutOfRangeError:
    print('TFRecord pattern %s is empty' % self.tfr_pattern, file=sys.stderr)
    exit(1)

  return next_op

################################################################################
# __main__
################################################################################

Source File: dataset.py From rcan-tensorflow with MIT License

5 votes

def parse_tfr_tf(record):
        features = tf.parse_single_example(record, features={
            'shape': tf.FixedLenFeature([3], tf.int64),
            'data': tf.FixedLenFeature([], tf.string)})
        data = tf.decode_raw(features['data'], tf.uint8)
        return tf.reshape(data, features['shape'])

Source File: reader.py From CapsLayer with Apache License 2.0

5 votes

def parse_fun(serialized_example):
    """ Data parsing function.
    """
    features = tf.parse_single_example(serialized_example,
                                       features={'image': tf.FixedLenFeature([], tf.string),
                                                 'label': tf.FixedLenFeature([], tf.int64)})
    image = tf.decode_raw(features['image'], tf.float32)
    image = tf.reshape(image, shape=[32 * 32 * 3])
    image.set_shape([32 * 32 * 3])
    image = tf.cast(image, tf.float32) * (1. / 255)
    label = tf.cast(features['label'], tf.int32)
    features = {'images': image, 'labels': label}
    return(features)

Source File: example_decoders_test.py From spotify-tensorflow with Apache License 2.0

5 votes

def test_example_with_feature_spec_decoder(self):
        feature_spec = {
            "scalar_feature_1": tf.FixedLenFeature(shape=[], dtype=tf.int64),
            "scalar_feature_2": tf.FixedLenFeature(shape=[], dtype=tf.int64),
            "scalar_feature_3": tf.FixedLenFeature(shape=[], dtype=tf.float32),
            "varlen_feature_1": tf.VarLenFeature(dtype=tf.float32),
            "varlen_feature_2": tf.VarLenFeature(dtype=tf.string),
            "1d_vector_feature": tf.FixedLenFeature(shape=[1], dtype=tf.string),
            "2d_vector_feature": tf.FixedLenFeature(shape=[2, 2], dtype=tf.float32),
            "sparse_feature": tf.SparseFeature("sparse_feature_idx", "sparse_feature_value",
                                               tf.float32, 10),
        }

        dec = ExampleWithFeatureSpecDecoder(feature_spec)
        actual_json = json.loads(dec.to_json(self.example_str))
        expected_decoded = {
            "scalar_feature_1": 12,
            "scalar_feature_2": 12,
            "scalar_feature_3": 1.0,
            "varlen_feature_1": [89.0],
            "1d_vector_feature": ["this is a ,text"],
            "2d_vector_feature": [[1.0, 2.0], [3.0, 4.0]],
            "varlen_feature_2": ["female"],
            "sparse_feature_idx": [1, 4],
            "sparse_feature_value": [12.0, 20.0],
        }
        self.assertEqual(actual_json, expected_decoded)

Source File: dataset_test.py From spotify-tensorflow with Apache License 2.0

5 votes

def _write_test_data():
        schema = feature_spec_to_schema({"f1": tf.FixedLenFeature((), tf.int64),
                                         "f2": tf.FixedLenFeature((), tf.int64)})
        values = [{"f1": 1, "f2": 2}]

        example_proto = [example_pb2.Example(features=feature_pb2.Features(feature={
            k: feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[v]))
            for k, v in d.items()
        })) for d in values]

        return DataUtil.write_test_data(example_proto, schema)

Source File: tf_io_pipline_tools.py From lanenet-lane-detection with Apache License 2.0

5 votes

def decode(serialized_example):
    """
    Parses an image and label from the given `serialized_example`
    :param serialized_example:
    :return:
    """
    features = tf.parse_single_example(
        serialized_example,
        # Defaults are not specified since both keys are required.
        features={
            'gt_image_raw': tf.FixedLenFeature([], tf.string),
            'gt_binary_image_raw': tf.FixedLenFeature([], tf.string),
            'gt_instance_image_raw': tf.FixedLenFeature([], tf.string)
        })

    # decode gt image
    gt_image_shape = tf.stack([RESIZE_IMAGE_HEIGHT, RESIZE_IMAGE_WIDTH, 3])
    gt_image = tf.decode_raw(features['gt_image_raw'], tf.uint8)
    gt_image = tf.reshape(gt_image, gt_image_shape)

    # decode gt binary image
    gt_binary_image_shape = tf.stack([RESIZE_IMAGE_HEIGHT, RESIZE_IMAGE_WIDTH, 1])
    gt_binary_image = tf.decode_raw(features['gt_binary_image_raw'], tf.uint8)
    gt_binary_image = tf.reshape(gt_binary_image, gt_binary_image_shape)

    # decode gt instance image
    gt_instance_image_shape = tf.stack([RESIZE_IMAGE_HEIGHT, RESIZE_IMAGE_WIDTH, 1])
    gt_instance_image = tf.decode_raw(features['gt_instance_image_raw'], tf.uint8)
    gt_instance_image = tf.reshape(gt_instance_image, gt_instance_image_shape)

    return gt_image, gt_binary_image, gt_instance_image

Source File: dataset.py From spotify-tensorflow with Apache License 2.0

5 votes

def parse_schema(cls, schema_path):
        # type: (str) -> Tuple[Dict[str, Union[tf.FixedLenFeature, tf.VarLenFeature, tf.SparseFeature]], Schema]  # noqa: E501
        """
        Returns TensorFlow Feature Spec and parsed tf.metadata Schema for given tf.metadata Schema.

        :param schema_path: tf.metadata Schema path
        """
        schema = parse_schema_file(schema_path)
        return schema_to_feature_spec(schema), schema

Source File: cifar10_tf.py From deep_architect with MIT License

5 votes

def parser(self, serialized_example):
        """Parses a single tf.Example into image and label tensors."""
        # Dimensions of the images in the CIFAR-10 dataset.
        # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the
        # input format.
        features = tf.parse_single_example(serialized_example,
                                           features={
                                               'image':
                                               tf.FixedLenFeature([],
                                                                  tf.string),
                                               'label':
                                               tf.FixedLenFeature([], tf.int64),
                                           })
        image = tf.decode_raw(features['image'], tf.uint8)
        image.set_shape([DEPTH * HEIGHT * WIDTH])

        # Reshape from [depth * height * width] to [depth, height, width].
        image = tf.cast(
            tf.transpose(tf.reshape(image, [DEPTH, HEIGHT, WIDTH]), [1, 2, 0]),
            tf.float32)
        label = tf.cast(features['label'], tf.int32)

        # # Custom preprocessing.
        # image = self.preprocess(image)

        return image, label

Source File: dataset.py From spotify-tensorflow with Apache License 2.0

5 votes

def parse_schema_from_stats(cls, stats_path):
        # type: (str) -> Tuple[Dict[str, Union[tf.FixedLenFeature, tf.VarLenFeature, tf.SparseFeature]], Schema]  # noqa: E501
        """
        Returns TensorFlow Feature Spec and parsed tf.metadata Schema for given tf.metadata
        DatasetFeatureStatisticsList.

        :param stats_path: tf.metadata DatasetFeatureStatisticsList path
        """
        import tensorflow_data_validation as tfdv
        stats = tfdv.load_statistics(stats_path)
        schema = tfdv.infer_schema(stats)
        return schema_to_feature_spec(schema), schema

Source File: dataset.py From spotify-tensorflow with Apache License 2.0

5 votes

def _examples(cls,
                      file_pattern,  # type: str
                      schema_path=None,  # type: str
                      feature_spec=None,  # type: Dict[str, Union[tf.FixedLenFeature, tf.VarLenFeature, tf.SparseFeature]]  # noqa: E501
                      default_value=0,  # type: float
                      compression_type=None,  # type: str
                      batch_size=128,  # type: int
                      shuffle=True,  # type: bool
                      num_epochs=1,  # type: int
                      shuffle_buffer_size=10000,  # type: int
                      shuffle_seed=None,  # type: int
                      prefetch_buffer_size=1,  # type: int
                      reader_num_threads=1,  # type: int
                      parser_num_threads=2,  # type: int
                      sloppy_ordering=False,  # type: bool
                      drop_final_batch=False  # type: bool
                      ):
            # type: (...) -> Iterator[pd.DataFrame]
            Datasets._assert_eager("DataFrame")
            dataset = Datasets.dict._examples(file_pattern=file_pattern,
                                              schema_path=schema_path,
                                              default_value=default_value,
                                              feature_spec=feature_spec,
                                              compression_type=compression_type,
                                              batch_size=batch_size,
                                              shuffle=shuffle,
                                              num_epochs=num_epochs,
                                              shuffle_buffer_size=shuffle_buffer_size,
                                              shuffle_seed=shuffle_seed,
                                              prefetch_buffer_size=prefetch_buffer_size,
                                              reader_num_threads=reader_num_threads,
                                              parser_num_threads=parser_num_threads,
                                              sloppy_ordering=sloppy_ordering,
                                              drop_final_batch=drop_final_batch)
            for d in dataset:
                yield pd.DataFrame(data=d)

Source File: dataset.py From spotify-tensorflow with Apache License 2.0

5 votes

def _examples(cls,
                  file_pattern,  # type: str
                  schema_path=None,  # type: str
                  feature_spec=None,  # type: Dict[str, Union[tf.FixedLenFeature, tf.VarLenFeature, tf.SparseFeature]]  # noqa: E501
                  compression_type=None,  # type: str
                  batch_size=128,  # type: int
                  shuffle=True,  # type: bool
                  num_epochs=1,  # type: int
                  shuffle_buffer_size=10000,  # type: int
                  shuffle_seed=None,  # type: int
                  prefetch_buffer_size=1,  # type: int
                  reader_num_threads=1,  # type: int
                  parser_num_threads=2,  # type: int
                  sloppy_ordering=False,  # type: bool
                  drop_final_batch=False  # type: bool
                  ):
        # type: (...) -> tf.data.Dataset

        if schema_path:
            feature_spec, _ = cls.parse_schema(schema_path)

        logger.debug("Will parse features from: `%s`, using features spec: %s",
                     file_pattern,
                     str(feature_spec))

        from tensorflow.contrib.data import make_batched_features_dataset
        reader_args = [compression_type] if compression_type else None
        dataset = make_batched_features_dataset(file_pattern,
                                                batch_size=batch_size,
                                                features=feature_spec,
                                                reader_args=reader_args,
                                                num_epochs=num_epochs,
                                                shuffle=shuffle,
                                                shuffle_buffer_size=shuffle_buffer_size,
                                                shuffle_seed=shuffle_seed,
                                                prefetch_buffer_size=prefetch_buffer_size,
                                                reader_num_threads=reader_num_threads,
                                                parser_num_threads=parser_num_threads,
                                                sloppy_ordering=sloppy_ordering,
                                                drop_final_batch=drop_final_batch)
        return dataset

Python tensorflow.FixedLenFeature() Examples