Python tensorflow_datasets.builder() Examples
The following are 30
code examples of tensorflow_datasets.builder().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow_datasets
, or try the search function
.
Example #1
Source File: datasets.py From mobilenetv3-tensorflow with Apache License 2.0 | 6 votes |
def build_dataset( shape: Tuple[int, int], name: str="mnist", train_batch_size: int=32, valid_batch_size: int=32 ): dataset = {} builder = tfds.builder(name) dataset["num_train"] = builder.info.splits['train'].num_examples dataset["num_test"] = builder.info.splits['test'].num_examples [ds_train, ds_test], info = tfds.load(name=name, split=["train", "test"], with_info=True) dataset["num_classes"] = info.features["label"].num_classes dataset["channels"] = ds_train.output_shapes["image"][-1].value ds_train = ds_train.shuffle(1024).repeat() ds_train = ds_train.map(lambda data: _parse_function(data, shape, dataset["num_classes"], dataset["channels"])) dataset["train"] = ds_train.batch(train_batch_size) ds_test = ds_test.shuffle(1024).repeat() ds_test = ds_test.map(lambda data: _parse_function(data, shape, dataset["num_classes"], dataset["channels"])) dataset["test"] = ds_test.batch(valid_batch_size) return dataset
Example #2
Source File: post_training_quantization.py From models with Apache License 2.0 | 6 votes |
def _representative_dataset_gen(): """Gets a python generator of numpy arrays for the given dataset.""" image_size = FLAGS.image_size dataset = tfds.builder(FLAGS.dataset_name, data_dir=FLAGS.dataset_dir) dataset.download_and_prepare() data = dataset.as_dataset()[FLAGS.dataset_split] iterator = tf.data.make_one_shot_iterator(data) if FLAGS.use_model_specific_preprocessing: preprocess_fn = functools.partial( preprocessing_factory.get_preprocessing(name=FLAGS.model_name), output_height=image_size, output_width=image_size) else: preprocess_fn = functools.partial( _preprocess_for_quantization, image_size=image_size) features = iterator.get_next() image = features["image"] image = preprocess_fn(image) image = tf.reshape(image, [1, image_size, image_size, 3]) for _ in range(FLAGS.num_steps): yield [image.eval()]
Example #3
Source File: tfds.py From blueoil with Apache License 2.0 | 6 votes |
def count_max_boxes(cls, builder): sess = tf.compat.v1.Session() max_boxes = 0 for split in builder.info.splits: tf_dataset = builder.as_dataset(split=split) iterator = tf.compat.v1.data.make_one_shot_iterator(tf_dataset) next_batch = iterator.get_next() while True: try: data = sess.run(next_batch) if max_boxes < data["objects"]["label"].shape[0]: max_boxes = data["objects"]["label"].shape[0] except tf.errors.OutOfRangeError: break return max_boxes
Example #4
Source File: document_datasets.py From datasets with Apache License 2.0 | 6 votes |
def document_single_builder(builder): """Doc string for a single builder, with or without configs.""" print('Document builder %s...' % builder.name) get_config_builder = lambda config: tfds.builder(builder.name, config=config) config_builders = [] if builder.builder_configs: with futures.ThreadPoolExecutor(max_workers=WORKER_COUNT_CONFIGS) as tpool: config_builders = list( tpool.map(get_config_builder, builder.BUILDER_CONFIGS)) tmpl = get_mako_template('dataset') visu_doc_util = VisualizationDocUtil() out_str = tmpl.render_unicode( builder=builder, config_builders=config_builders, visu_doc_util=visu_doc_util, nightly_doc_util=NightlyDocUtil(), ).strip() schema_org_tmpl = get_mako_template('schema_org') schema_org_out_str = schema_org_tmpl.render_unicode( builder=builder, config_builders=config_builders, visu_doc_util=visu_doc_util, ).strip() out_str = schema_org_out_str + '\n' + out_str return out_str
Example #5
Source File: generate_visualization.py From datasets with Apache License 2.0 | 6 votes |
def _get_full_names(datasets: Optional[List[str]] = None) -> List[str]: """List all builder names `ds/version` and `ds/config/version` to generate. Args: datasets: List of datasets from which get the builder names. Returns: builder_names: The builder names. """ if datasets is None: return tfds.core.registered.list_full_names( current_version_only=True, ) else: builder_names = list(itertools.chain.from_iterable([ tfds.core.registered.single_full_names(builder_name) for builder_name in datasets ])) return builder_names
Example #6
Source File: oxford_iiit_pet.py From task_adaptation with Apache License 2.0 | 5 votes |
def __init__(self, data_dir=None): dataset_builder = tfds.builder("oxford_iiit_pet:3.*.*", data_dir=data_dir) dataset_builder.download_and_prepare() # Defines dataset specific train/val/trainval/test splits. tfds_splits = {} tfds_splits["train"] = "train[:{}%]".format(TRAIN_SPLIT_PERCENT) tfds_splits["val"] = "train[{}%:]".format(TRAIN_SPLIT_PERCENT) tfds_splits["trainval"] = tfds.Split.TRAIN tfds_splits["test"] = tfds.Split.TEST # Creates a dict with example counts for each split. num_samples_splits = {} trainval_count = dataset_builder.info.splits[tfds.Split.TRAIN].num_examples test_count = dataset_builder.info.splits[tfds.Split.TEST].num_examples num_samples_splits["train"] = (TRAIN_SPLIT_PERCENT * trainval_count) // 100 num_samples_splits["val"] = trainval_count - num_samples_splits["train"] num_samples_splits["trainval"] = trainval_count num_samples_splits["test"] = test_count super(OxfordIIITPetData, self).__init__( dataset_builder=dataset_builder, tfds_splits=tfds_splits, num_samples_splits=num_samples_splits, num_preprocessing_threads=400, shuffle_buffer_size=10000, # Note: Export only image and label tensors with their original types. base_preprocess_fn=base.make_get_tensors_fn(["image", "label"]), num_classes=dataset_builder.info.features["label"].num_classes)
Example #7
Source File: utils.py From text-to-text-transfer-transformer with Apache License 2.0 | 5 votes |
def files(self, split): """Returns set of instructions for reading TFDS files for the dataset.""" split = self._map_split(split) if "/" not in self.name and self.builder.BUILDER_CONFIGS: # If builder has multiple configs, and no particular config was # requested, raise an error. raise ValueError("Dataset '%s' has multiple configs." % self.name) split_info = self.builder.info.splits[split] files = split_info.file_instructions if not files: logging.fatal("No TFRecord files found for dataset: %s", self.name) return files
Example #8
Source File: utils.py From text-to-text-transfer-transformer with Apache License 2.0 | 5 votes |
def load_shard(self, file_instruction): """Returns a dataset for a single shard of the TFDS TFRecord files.""" ds = self.builder._tfrecords_reader.read_files( # pylint:disable=protected-access [file_instruction], read_config=tfds.ReadConfig(), shuffle_files=False) return ds
Example #9
Source File: patch_camelyon.py From task_adaptation with Apache License 2.0 | 5 votes |
def __init__(self, data_dir=None): dataset_builder = tfds.builder("patch_camelyon:2.*.*", data_dir=data_dir) dataset_builder.download_and_prepare() # Defines dataset specific train/val/trainval/test splits. tfds_splits = { "test": "test", "train": "train", "val": "validation", "trainval": "train+validation", } # Creates a dict with example counts. num_samples_splits = { "test": dataset_builder.info.splits["test"].num_examples, "train": dataset_builder.info.splits["train"].num_examples, "val": dataset_builder.info.splits["validation"].num_examples, } num_samples_splits["trainval"] = ( num_samples_splits["train"] + num_samples_splits["val"]) super(PatchCamelyonData, self).__init__( dataset_builder=dataset_builder, tfds_splits=tfds_splits, num_samples_splits=num_samples_splits, num_preprocessing_threads=400, shuffle_buffer_size=10000, # Note: Export only image and label tensors with their original types. base_preprocess_fn=base.make_get_tensors_fn(["image", "label"]), num_classes=dataset_builder.info.features["label"].num_classes)
Example #10
Source File: sun397.py From task_adaptation with Apache License 2.0 | 5 votes |
def __init__(self, config="tfds", data_dir=None): if config == "tfds": dataset_builder = tfds.builder("sun397/tfds:4.*.*", data_dir=data_dir) dataset_builder.download_and_prepare() tfds_splits = { "train": "train", "val": "validation", "test": "test", "trainval": "train+validation", } # Creates a dict with example counts. num_samples_splits = { "test": dataset_builder.info.splits["test"].num_examples, "train": dataset_builder.info.splits["train"].num_examples, "val": dataset_builder.info.splits["validation"].num_examples, } num_samples_splits["trainval"] = ( num_samples_splits["train"] + num_samples_splits["val"]) else: raise ValueError("No supported config %r for Sun397Data." % config) super(Sun397Data, self).__init__( dataset_builder=dataset_builder, tfds_splits=tfds_splits, num_samples_splits=num_samples_splits, num_preprocessing_threads=400, shuffle_buffer_size=10000, # Note: Export only image and label tensors with their original types. base_preprocess_fn=base.make_get_tensors_fn(["image", "label"]), num_classes=dataset_builder.info.features["label"].num_classes)
Example #11
Source File: kitti.py From task_adaptation with Apache License 2.0 | 5 votes |
def __init__(self, task, data_dir=None): if task not in _TASK_DICT: raise ValueError("Unknown task: %s" % task) dataset_builder = tfds.builder("kitti:3.1.0", data_dir=data_dir) dataset_builder.download_and_prepare() tfds_splits = { "train": "train", "val": "validation", "trainval": "train+validation", "test": "test", } # Example counts are retrieved from the tensorflow dataset info. num_examples = dataset_builder.info.splits[tfds.Split.TRAIN].num_examples train_count = num_examples * TRAIN_SPLIT_PERCENT // 100 val_count = num_examples * VALIDATION_SPLIT_PERCENT // 100 test_count = num_examples * TEST_SPLIT_PERCENT // 100 # Creates a dict with example counts for each split. num_samples_splits = { "train": train_count, "val": val_count, "trainval": train_count + val_count, "test": test_count } task = _TASK_DICT[task] base_preprocess_fn = task["preprocess_fn"] super(KittiData, self).__init__( dataset_builder=dataset_builder, tfds_splits=tfds_splits, num_samples_splits=num_samples_splits, num_preprocessing_threads=400, shuffle_buffer_size=10000, base_preprocess_fn=base_preprocess_fn, num_classes=task["num_classes"])
Example #12
Source File: dmlab.py From task_adaptation with Apache License 2.0 | 5 votes |
def __init__(self, data_dir=None): dataset_builder = tfds.builder("dmlab:2.0.0", data_dir=data_dir) tfds_splits = { "train": "train", "val": "validation", "trainval": "train+validation", "test": "test" } # Example counts are retrieved from the tensorflow dataset info. train_count = dataset_builder.info.splits[tfds.Split.TRAIN].num_examples val_count = dataset_builder.info.splits[tfds.Split.VALIDATION].num_examples test_count = dataset_builder.info.splits[tfds.Split.TEST].num_examples # Creates a dict with example counts for each split. num_samples_splits = { "train": train_count, "val": val_count, "trainval": train_count + val_count, "test": test_count } super(DmlabData, self).__init__( dataset_builder=dataset_builder, tfds_splits=tfds_splits, num_samples_splits=num_samples_splits, num_preprocessing_threads=400, shuffle_buffer_size=10000, base_preprocess_fn=base.make_get_and_cast_tensors_fn({ "image": ("image", None), "label": ("label", None), }), num_classes=dataset_builder.info.features["label"].num_classes, image_key="image")
Example #13
Source File: oxford_flowers102.py From task_adaptation with Apache License 2.0 | 5 votes |
def __init__(self, data_dir=None): dataset_builder = tfds.builder("oxford_flowers102:2.*.*", data_dir=data_dir) dataset_builder.download_and_prepare() tfds_splits = { "train": "train", "val": "validation", "trainval": "train+validation", "test": "test", } # Example counts are retrieved from the tensorflow dataset info. train_count = dataset_builder.info.splits[tfds.Split.TRAIN].num_examples val_count = dataset_builder.info.splits[tfds.Split.VALIDATION].num_examples test_count = dataset_builder.info.splits[tfds.Split.TEST].num_examples # Creates a dict with example counts for each split. num_samples_splits = { "train": train_count, "val": val_count, "trainval": train_count + val_count, "test": test_count } super(OxfordFlowers102Data, self).__init__( dataset_builder=dataset_builder, tfds_splits=tfds_splits, num_samples_splits=num_samples_splits, num_preprocessing_threads=400, shuffle_buffer_size=10000, # Note: Rename tensors but keep their original types. base_preprocess_fn=base.make_get_and_cast_tensors_fn({ "image": ("image", None), "label": ("label", None), }), num_classes=dataset_builder.info.features["label"] .num_classes)
Example #14
Source File: dtd.py From task_adaptation with Apache License 2.0 | 5 votes |
def __init__(self, data_dir=None): dataset_builder = tfds.builder("dtd:3.*.*", data_dir=data_dir) dataset_builder.download_and_prepare() # Defines dataset specific train/val/trainval/test splits. tfds_splits = {} tfds_splits["train"] = "train" tfds_splits["val"] = "validation" tfds_splits["trainval"] = "train+validation" tfds_splits["test"] = "test" # Creates a dict with example counts for each split. num_samples_splits = {} train_count = dataset_builder.info.splits[tfds.Split.TRAIN].num_examples val_count = dataset_builder.info.splits[tfds.Split.VALIDATION].num_examples test_count = dataset_builder.info.splits[tfds.Split.TEST].num_examples num_samples_splits["train"] = train_count num_samples_splits["val"] = val_count num_samples_splits["trainval"] = train_count + val_count num_samples_splits["test"] = test_count super(DTDData, self).__init__( dataset_builder=dataset_builder, tfds_splits=tfds_splits, num_samples_splits=num_samples_splits, num_preprocessing_threads=400, shuffle_buffer_size=10000, # Note: Export only image and label tensors with their original types. base_preprocess_fn=base.make_get_tensors_fn(["image", "label"]), num_classes=dataset_builder.info.features["label"].num_classes)
Example #15
Source File: utils.py From text-to-text-transfer-transformer with Apache License 2.0 | 5 votes |
def info(self): return self.builder.info
Example #16
Source File: imagenet.py From task_adaptation with Apache License 2.0 | 5 votes |
def __init__(self, features=("image", "label")): dataset_builder = tfds.builder("imagenet2012:5.*.*") # Defines dataset specific train/val/trainval/test splits. # Note, that the test split for "imagenet2012" dataset is not available. # Thus, we use the val split as test. Moreover, we split the train split # into two parts: new train split and new val split. tfds_splits = {} tfds_splits["train"] = "train[:{}%]".format(TRAIN_SPLIT_PERCENT) tfds_splits["val"] = "train[{}%:]".format(TRAIN_SPLIT_PERCENT) tfds_splits["trainval"] = "train" tfds_splits["test"] = "validation" # Creates a dict with example counts. num_samples_splits = {} trainval_count = dataset_builder.info.splits["train"].num_examples test_count = dataset_builder.info.splits["validation"].num_examples num_samples_splits["train"] = (TRAIN_SPLIT_PERCENT * trainval_count) // 100 num_samples_splits["val"] = trainval_count - num_samples_splits["train"] num_samples_splits["trainval"] = trainval_count num_samples_splits["test"] = test_count super(ImageNetData, self).__init__( dataset_builder=dataset_builder, tfds_splits=tfds_splits, num_samples_splits=num_samples_splits, num_preprocessing_threads=400, shuffle_buffer_size=10000, # Note: Export only image and label tensors with their original types. base_preprocess_fn=base.make_get_tensors_fn(features), filter_fn=self._get_filter_fn(), num_classes=dataset_builder.info.features["label"].num_classes)
Example #17
Source File: svhn.py From task_adaptation with Apache License 2.0 | 5 votes |
def __init__(self, data_dir=None): dataset_builder = tfds.builder("svhn_cropped:3.*.*", data_dir=data_dir) dataset_builder.download_and_prepare() # Defines dataset specific train/val/trainval/test splits. # The validation set is split out of the original training set, and the # remaining examples are used as the "train" split. The "trainval" split # corresponds to the original training set. tfds_splits = { "train": "train[:{}%]".format(TRAIN_SPLIT_PERCENT), "val": "train[{}%:]".format(TRAIN_SPLIT_PERCENT), "trainval": "train", "test": "test", } # Example counts are retrieved from the tensorflow dataset info. trainval_count = dataset_builder.info.splits[tfds.Split.TRAIN].num_examples test_count = dataset_builder.info.splits[tfds.Split.TEST].num_examples # Creates a dict with example counts for each split. num_samples_splits = { # Calculates the train/val split example count based on percent. "train": TRAIN_SPLIT_PERCENT * trainval_count // 100, "val": trainval_count - TRAIN_SPLIT_PERCENT * trainval_count // 100, "trainval": trainval_count, "test": test_count } super(SvhnData, self).__init__( dataset_builder=dataset_builder, tfds_splits=tfds_splits, num_samples_splits=num_samples_splits, num_preprocessing_threads=400, shuffle_buffer_size=10000, # Note: Rename tensors but keep their original types. base_preprocess_fn=base.make_get_and_cast_tensors_fn({ "image": ("image", None), "label": ("label", None), }), num_classes=dataset_builder.info.features["label"] .num_classes)
Example #18
Source File: caltech.py From task_adaptation with Apache License 2.0 | 5 votes |
def __init__(self, num_classes=10, data_dir=None): dataset_builder = tfds.builder("caltech101:3.*.*", data_dir=data_dir) dataset_builder.download_and_prepare() # Defines dataset specific train/val/trainval/test splits. tfds_splits = {} tfds_splits["train"] = "train[:{}%]".format(_TRAIN_SPLIT_PERCENT) tfds_splits["val"] = "train[{}%:]".format(_TRAIN_SPLIT_PERCENT) tfds_splits["trainval"] = "train" tfds_splits["test"] = "test" # Creates a dict with example counts for each split. trainval_count = dataset_builder.info.splits[tfds.Split.TRAIN].num_examples train_count = (_TRAIN_SPLIT_PERCENT * trainval_count) // 100 test_count = dataset_builder.info.splits[tfds.Split.TEST].num_examples num_samples_splits = dict( train=train_count, val=trainval_count - train_count, trainval=trainval_count, test=test_count) super(Caltech101, self).__init__( dataset_builder=dataset_builder, tfds_splits=tfds_splits, num_samples_splits=num_samples_splits, num_preprocessing_threads=400, shuffle_buffer_size=3000, base_preprocess_fn=base.make_get_tensors_fn(("image", "label")), num_classes=dataset_builder.info.features["label"].num_classes)
Example #19
Source File: dataset_factory.py From models with Apache License 2.0 | 5 votes |
def __init__(self, config: DatasetConfig, **overrides: Any): """Initialize the builder from the config.""" self.config = config.replace(**overrides) self.builder_info = None if self.config.augmenter is not None: logging.info('Using augmentation: %s', self.config.augmenter.name) self.augmenter = self.config.augmenter.build() else: self.augmenter = None
Example #20
Source File: dataset_factory.py From models with Apache License 2.0 | 5 votes |
def info(self) -> tfds.core.DatasetInfo: """The TFDS dataset info, if available.""" if self.builder_info is None: self.builder_info = tfds.builder(self.config.name).info return self.builder_info
Example #21
Source File: dataset_factory.py From models with Apache License 2.0 | 5 votes |
def _build(self, input_context: tf.distribute.InputContext = None ) -> tf.data.Dataset: """Construct a dataset end-to-end and return it. Args: input_context: An optional context provided by `tf.distribute` for cross-replica training. Returns: A TensorFlow dataset outputting batched images and labels. """ builders = { 'tfds': self.load_tfds, 'records': self.load_records, 'synthetic': self.load_synthetic, } builder = builders.get(self.config.builder, None) if builder is None: raise ValueError('Unknown builder type {}'.format(self.config.builder)) self.input_context = input_context dataset = builder() dataset = self.pipeline(dataset) return dataset
Example #22
Source File: dataset_factory.py From models with Apache License 2.0 | 5 votes |
def load_tfds(self) -> tf.data.Dataset: """Return a dataset loading files from TFDS.""" logging.info('Using TFDS to load data.') builder = tfds.builder(self.config.name, data_dir=self.config.data_dir) if self.config.download: builder.download_and_prepare() decoders = {} if self.config.skip_decoding: decoders['image'] = tfds.decode.SkipDecoding() read_config = tfds.ReadConfig( interleave_cycle_length=10, interleave_block_length=1, input_context=self.input_context) dataset = builder.as_dataset( split=self.config.split, as_supervised=True, shuffle_files=True, decoders=decoders, read_config=read_config) return dataset
Example #23
Source File: vocabulary.py From mesh with Apache License 2.0 | 5 votes |
def get_tfds_vocabulary(dataset_name=gin.REQUIRED): info = tfds.builder(dataset_name).info # this assumes that either there are no inputs, or that the # inputs and targets have the same vocabulary. return TFDSVocabulary(info.features[info.supervised_keys[1]].encoder)
Example #24
Source File: t2t.py From BERT with Apache License 2.0 | 5 votes |
def train_and_eval_dataset(dataset_name, data_dir): """Return train and evaluation datasets, feature info and supervised keys. Args: dataset_name: a string, the name of the dataset; if it starts with "v1_" then we'll search T2T Problem registry for it, otherwise we assume it is a dataset from TFDS and load it from there. data_dir: directory where the data is located. Returns: a 4-tuple consisting of: * the train tf.data.Dataset * the eval tf.data.Dataset * information about features: a python dictionary with feature names as keys and an object as value that provides .shape and .num_classes. * supervised_keys: information what's the input and what's the target, ie., a pair of lists with input and target feature names. """ if dataset_name.startswith("v1_"): return _train_and_eval_dataset_v1(dataset_name[3:], data_dir) dataset_builder = tfds.builder(dataset_name, data_dir=data_dir) info = dataset_builder.info splits = dataset_builder.info.splits if tfds.Split.TRAIN not in splits: raise ValueError("To train we require a train split in the dataset.") if tfds.Split.VALIDATION not in splits and "test" not in splits: raise ValueError("We require a validation or test split in the dataset.") eval_split = tfds.Split.VALIDATION if tfds.Split.VALIDATION not in splits: eval_split = tfds.Split.TEST train, valid = tfds.load( name=dataset_name, split=[tfds.Split.TRAIN, eval_split]) keys = None if info.supervised_keys: keys = ([info.supervised_keys[0]], [info.supervised_keys[1]]) return train, valid, info.features, keys
Example #25
Source File: tf_inputs.py From trax with Apache License 2.0 | 5 votes |
def download_and_prepare(dataset_name, data_dir): """Downloads and prepares T2T or TFDS dataset. Args: dataset_name: tfds dataset or t2t problem name prefixed by 't2t_'. data_dir: location of existing dataset or None. Returns: data_dir: path string of downloaded data. """ if not data_dir: data_dir = os.path.expanduser('~/tensorflow_datasets/') dl_dir = os.path.join(data_dir, 'download') logging.info( 'No dataset directory provided. ' 'Downloading and generating dataset for %s inside data directory %s ' 'For large datasets it is better to prepare datasets manually!', dataset_name, data_dir) if dataset_name.startswith('t2t_'): # Download and run dataset generator for T2T problem. data_dir = os.path.join(data_dir, dataset_name) tf.io.gfile.makedirs(data_dir) tf.io.gfile.makedirs(dl_dir) t2t_problems().problem( dataset_name[len('t2t_'):]).generate_data(data_dir, dl_dir) else: # Download and prepare TFDS dataset. tfds_builder = tfds.builder(dataset_name) tfds_builder.download_and_prepare(download_dir=dl_dir) else: data_dir = os.path.expanduser(data_dir) return data_dir
Example #26
Source File: tfds.py From blueoil with Apache License 2.0 | 5 votes |
def __init__( self, name, data_dir, image_size, download=False, num_max_boxes=None, tfds_pre_processor=None, tfds_augmentor=None, *args, **kwargs ): super().__init__( *args, **kwargs, ) if name in tfds.list_builders(): self._builder = tfds.builder(name, data_dir=data_dir) if download: self._builder.download_and_prepare() else: if not tf.io.gfile.exists(os.path.join(data_dir, name)): raise ValueError("Dataset directory does not exist: {}\n" "Please run `python blueoil/cmd/build_tfds.py -c <config file>` before training." .format(os.path.join(data_dir, name))) self._builder = self.builder_class(name, data_dir=data_dir) self.info = self._builder.info self._init_available_splits() self._validate_feature_structure() self.tf_dataset = self._builder.as_dataset(split=self.available_splits[self.subset]) self.tfds_pre_processor = tfds_pre_processor self.tfds_augmentor = tfds_augmentor self._image_size = image_size self._num_max_boxes = num_max_boxes self._format_dataset()
Example #27
Source File: download_and_prepare.py From datasets with Apache License 2.0 | 5 votes |
def download_and_prepare(builder): """Generate data for a given dataset.""" logging.info("download_and_prepare for dataset %s...", builder.info.full_name) dl_config = download_config() if isinstance(builder, tfds.core.BeamBasedBuilder): beam = tfds.core.lazy_imports.apache_beam # TODO(b/129149715): Restore compute stats. Currently skipped because not # beam supported. dl_config.compute_stats = tfds.download.ComputeStatsMode.SKIP dl_config.beam_options = beam.options.pipeline_options.PipelineOptions( flags=["--%s" % opt for opt in FLAGS.beam_pipeline_options]) if FLAGS.add_name_to_manual_dir: dl_config.manual_dir = os.path.join(dl_config.manual_dir, builder.name) builder.download_and_prepare( download_dir=FLAGS.download_dir, download_config=dl_config, ) termcolor.cprint(str(builder.info.as_proto), attrs=["bold"]) if FLAGS.debug: dataset = builder.as_dataset(split=tfds.Split.TRAIN) pdb.set_trace() del dataset
Example #28
Source File: document_datasets.py From datasets with Apache License 2.0 | 5 votes |
def _get_name(self, builder): return builder.info.full_name.replace('/', '-') + '.png'
Example #29
Source File: document_datasets.py From datasets with Apache License 2.0 | 5 votes |
def get_url(self, builder): return self.BASE_URL + self._get_name(builder)
Example #30
Source File: document_datasets.py From datasets with Apache License 2.0 | 5 votes |
def has_visualization(self, builder): filepath = os.path.join(self.BASE_PATH, self._get_name(builder)) return tf.io.gfile.exists(filepath)