Python torchvision.datasets() Examples
The following are 30
code examples of torchvision.datasets().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torchvision
, or try the search function
.
Example #1
Source File: dataloader.py From imagenet18_old with The Unlicense | 7 votes |
def get_loaders(traindir, valdir, sz, bs, fp16=True, val_bs=None, workers=8, rect_val=False, min_scale=0.08, distributed=False): val_bs = val_bs or bs train_tfms = [ transforms.RandomResizedCrop(sz, scale=(min_scale, 1.0)), transforms.RandomHorizontalFlip() ] train_dataset = datasets.ImageFolder(traindir, transforms.Compose(train_tfms)) train_sampler = (DistributedSampler(train_dataset, num_replicas=env_world_size(), rank=env_rank()) if distributed else None) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=bs, shuffle=(train_sampler is None), num_workers=workers, pin_memory=True, collate_fn=fast_collate, sampler=train_sampler) val_dataset, val_sampler = create_validation_set(valdir, val_bs, sz, rect_val=rect_val, distributed=distributed) val_loader = torch.utils.data.DataLoader( val_dataset, num_workers=workers, pin_memory=True, collate_fn=fast_collate, batch_sampler=val_sampler) train_loader = BatchTransformDataLoader(train_loader, fp16=fp16) val_loader = BatchTransformDataLoader(val_loader, fp16=fp16) return train_loader, val_loader, train_sampler, val_sampler
Example #2
Source File: cifarloader.py From DTC with MIT License | 6 votes |
def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: (image, target) where target is index of the target class. """ img, target = self.data[index], self.targets[index] # doing this so that it is consistent with all other datasets # to return a PIL Image img = Image.fromarray(img) if self.transform is not None: img = self.transform(img) if self.target_transform is not None: target = self.target_transform(target) return img, target, index
Example #3
Source File: train_model.py From super-events-cvpr18 with MIT License | 6 votes |
def load_data(train_split, val_split, root): # Load Data if len(train_split) > 0: dataset = Dataset(train_split, 'training', root, batch_size) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True, collate_fn=collate_fn) dataloader.root = root else: dataset = None dataloader = None val_dataset = Dataset(val_split, 'testing', root, batch_size) val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=True, num_workers=2, pin_memory=True, collate_fn=collate_fn) val_dataloader.root = root dataloaders = {'train': dataloader, 'val': val_dataloader} datasets = {'train': dataset, 'val': val_dataset} return dataloaders, datasets # train the model
Example #4
Source File: svhnloader.py From DTC with MIT License | 6 votes |
def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: (image, target) where target is index of the target class. """ img, target = self.data[index], int(self.labels[index]) # doing this so that it is consistent with all other datasets # to return a PIL Image img = Image.fromarray(np.transpose(img, (1, 2, 0))) if self.transform is not None: img = self.transform(img) if self.target_transform is not None: target = self.target_transform(target) return img, target, index
Example #5
Source File: dataloader.py From FewShotWithoutForgetting with MIT License | 6 votes |
def __init__(self, split='train'): self.split = split assert(split=='train' or split=='val') self.name = 'ImageNet_Split_' + split print('Loading ImageNet dataset - split {0}'.format(split)) transforms_list = [] transforms_list.append(transforms.Scale(256)) transforms_list.append(transforms.CenterCrop(224)) transforms_list.append(lambda x: np.asarray(x)) transforms_list.append(transforms.ToTensor()) mean_pix = [0.485, 0.456, 0.406] std_pix = [0.229, 0.224, 0.225] transforms_list.append(transforms.Normalize(mean=mean_pix, std=std_pix)) self.transform = transforms.Compose(transforms_list) traindir = os.path.join(_IMAGENET_DATASET_DIR, 'train') valdir = os.path.join(_IMAGENET_DATASET_DIR, 'val') self.data = datasets.ImageFolder( traindir if split=='train' else valdir, self.transform) self.labels = [item[1] for item in self.data.imgs]
Example #6
Source File: CycleGAN.py From Deep-learning-with-cats with GNU General Public License v3.0 | 5 votes |
def __getitem__(self, i): return tuple(d[i] for d in self.datasets)
Example #7
Source File: create_image_dataloader.py From Auto-PyTorch with Apache License 2.0 | 5 votes |
def get_pipeline_config_options(self): options = [ ConfigOption("default_dataset_download_dir", default=ConfigFileParser.get_autonet_home(), type='directory', info="Directory default datasets will be downloaded to."), ConfigOption("dataloader_worker", default=1, type=int), ConfigOption("dataloader_cache_size_mb", default=0, type=int) ] return options
Example #8
Source File: data.py From dnn-mode-connectivity with BSD 2-Clause "Simplified" License | 5 votes |
def loaders(dataset, path, batch_size, num_workers, transform_name, use_test=False, shuffle_train=True): ds = getattr(torchvision.datasets, dataset) path = os.path.join(path, dataset.lower()) transform = getattr(getattr(Transforms, dataset), transform_name) train_set = ds(path, train=True, download=True, transform=transform.train) if use_test: print('You are going to run models on the test set. Are you sure?') test_set = ds(path, train=False, download=True, transform=transform.test) else: print("Using train (45000) + validation (5000)") train_set.train_data = train_set.train_data[:-5000] train_set.train_labels = train_set.train_labels[:-5000] test_set = ds(path, train=True, download=True, transform=transform.test) test_set.train = False test_set.test_data = test_set.train_data[-5000:] test_set.test_labels = test_set.train_labels[-5000:] delattr(test_set, 'train_data') delattr(test_set, 'train_labels') return { 'train': torch.utils.data.DataLoader( train_set, batch_size=batch_size, shuffle=shuffle_train, num_workers=num_workers, pin_memory=True ), 'test': torch.utils.data.DataLoader( test_set, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True ), }, max(train_set.train_labels) + 1
Example #9
Source File: main.py From binary-wide-resnet with MIT License | 5 votes |
def create_dataset(args, train): transform = T.Compose([ T.ToTensor(), T.Normalize(np.array([125.3, 123.0, 113.9]) / 255.0, np.array([63.0, 62.1, 66.7]) / 255.0), ]) if train: transform = T.Compose([ T.Pad(4, padding_mode='reflect'), T.RandomHorizontalFlip(), T.RandomCrop(32), transform ]) return getattr(datasets, args.dataset)(args.dataroot, train=train, download=True, transform=transform)
Example #10
Source File: loaders.py From WatermarkNN with MIT License | 5 votes |
def getdataloader(datatype, train_db_path, test_db_path, batch_size): # get transformations transform_train, transform_test = _getdatatransformsdb(datatype=datatype) n_classes = 0 # Data loaders if datatype.lower() == CIFAR10: print("Using CIFAR10 dataset.") trainset = torchvision.datasets.CIFAR10(root=train_db_path, train=True, download=True, transform=transform_train) testset = torchvision.datasets.CIFAR10(root=test_db_path, train=False, download=True, transform=transform_test) n_classes = 10 elif datatype.lower() == CIFAR100: print("Using CIFAR100 dataset.") trainset = torchvision.datasets.CIFAR100(root=train_db_path, train=True, download=True, transform=transform_train) testset = torchvision.datasets.CIFAR100(root=test_db_path, train=False, download=True, transform=transform_test) n_classes = 100 else: print("Dataset is not supported.") return None, None, None trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4) testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=4) return trainloader, testloader, n_classes
Example #11
Source File: dataloader.py From FewShotWithoutForgetting with MIT License | 5 votes |
def __getitem__(self, index): img, label = self.data[index], self.labels[index] # doing this so that it is consistent with all other datasets # to return a PIL Image img = Image.fromarray(img) if self.transform is not None: img = self.transform(img) return img, label
Example #12
Source File: dataloader.py From imagenet18_old with The Unlicense | 5 votes |
def sort_ar(valdir): idx2ar_file = valdir+'/../sorted_idxar.p' if os.path.isfile(idx2ar_file): return pickle.load(open(idx2ar_file, 'rb')) print('Creating AR indexes. Please be patient this may take a couple minutes...') val_dataset = datasets.ImageFolder(valdir) # AS: TODO: use Image.open instead of looping through dataset sizes = [img[0].size for img in tqdm(val_dataset, total=len(val_dataset))] idx_ar = [(i, round(s[0]/s[1], 5)) for i,s in enumerate(sizes)] sorted_idxar = sorted(idx_ar, key=lambda x: x[1]) pickle.dump(sorted_idxar, open(idx2ar_file, 'wb')) print('Done') return sorted_idxar
Example #13
Source File: dataloader.py From imagenet18_old with The Unlicense | 5 votes |
def create_validation_set(valdir, batch_size, target_size, rect_val, distributed): if rect_val: idx_ar_sorted = sort_ar(valdir) idx_sorted, _ = zip(*idx_ar_sorted) idx2ar = map_idx2ar(idx_ar_sorted, batch_size) ar_tfms = [transforms.Resize(int(target_size*1.14)), CropArTfm(idx2ar, target_size)] val_dataset = ValDataset(valdir, transform=ar_tfms) val_sampler = DistValSampler(idx_sorted, batch_size=batch_size, distributed=distributed) return val_dataset, val_sampler val_tfms = [transforms.Resize(int(target_size*1.14)), transforms.CenterCrop(target_size)] val_dataset = datasets.ImageFolder(valdir, transforms.Compose(val_tfms)) val_sampler = DistValSampler(list(range(len(val_dataset))), batch_size=batch_size, distributed=distributed) return val_dataset, val_sampler
Example #14
Source File: toy.py From Hydra with MIT License | 5 votes |
def toy(dataset, root='~/data/torchvision/', transforms=None): """Load a train and test datasets from torchvision.dataset. """ if not hasattr(torchvision.datasets, dataset): raise ValueError loader_def = getattr(torchvision.datasets, dataset) transform_funcs = [] if transforms is not None: for transform in transforms: if not hasattr(torchvision.transforms, transform['def']): raise ValueError transform_def = getattr(torchvision.transforms, transform['def']) transform_funcs.append(transform_def(**transform['kwargs'])) transform_funcs.append(torchvision.transforms.ToTensor()) composed_transform = torchvision.transforms.Compose(transform_funcs) trainset = loader_def( root=os.path.expanduser(root), train=True, download=True, transform=composed_transform) testset = loader_def( root=os.path.expanduser(root), train=False, download=True, transform=composed_transform) return trainset, testset
Example #15
Source File: CycleGAN.py From Deep-learning-with-cats with GNU General Public License v3.0 | 5 votes |
def __init__(self, *datasets): self.datasets = datasets
Example #16
Source File: CycleGAN.py From Deep-learning-with-cats with GNU General Public License v3.0 | 5 votes |
def __len__(self): return min(len(d) for d in self.datasets)
Example #17
Source File: test_first_block.py From kinetics_i3d_pytorch with MIT License | 5 votes |
def test_input_block(): normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) dataset = datasets.ImageFolder('/sequoia/data1/yhasson/datasets/test-dataset', transforms.Compose([ transforms.RandomSizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) densenet = torchvision.models.densenet121(pretrained=True) features = densenet.features seq2d = torch.nn.Sequential( features.conv0, features.norm0, features.relu0, features.pool0) seq3d = torch.nn.Sequential( inflate.inflate_conv(features.conv0, 3), inflate.inflate_batch_norm(features.norm0), features.relu0, inflate.inflate_pool(features.pool0, 1)) loader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=False) frame_nb = 4 for i, (input_2d, target) in enumerate(loader): target = target.cuda() target_var = torch.autograd.Variable(target) input_2d_var = torch.autograd.Variable(input_2d) out2d = seq2d(input_2d_var) time_pad = torch.nn.ReplicationPad3d((0, 0, 0, 0, 1, 1)) input_3d = input_2d.unsqueeze(2).repeat(1, 1, frame_nb, 1, 1) input_3d_var = time_pad(input_3d) out3d = seq3d(input_3d_var) expected_out_3d = out2d.data.unsqueeze(2).repeat(1, 1, frame_nb, 1, 1) out_diff = expected_out_3d - out3d.data print(out_diff.max()) assert(out_diff.max() < 0.0001)
Example #18
Source File: tiered_imagenet.py From MetaOptNet with Apache License 2.0 | 5 votes |
def __getitem__(self, index): img, label = self.data[index], self.labels[index] # doing this so that it is consistent with all other datasets # to return a PIL Image img = Image.fromarray(img) if self.transform is not None: img = self.transform(img) return img, label
Example #19
Source File: CIFAR_FS.py From MetaOptNet with Apache License 2.0 | 5 votes |
def __getitem__(self, index): img, label = self.data[index], self.labels[index] # doing this so that it is consistent with all other datasets # to return a PIL Image img = Image.fromarray(img) if self.transform is not None: img = self.transform(img) return img, label
Example #20
Source File: mini_imagenet.py From MetaOptNet with Apache License 2.0 | 5 votes |
def __getitem__(self, index): img, label = self.data[index], self.labels[index] # doing this so that it is consistent with all other datasets # to return a PIL Image img = Image.fromarray(img) if self.transform is not None: img = self.transform(img) return img, label
Example #21
Source File: FC100.py From MetaOptNet with Apache License 2.0 | 5 votes |
def __getitem__(self, index): img, label = self.data[index], self.labels[index] # doing this so that it is consistent with all other datasets # to return a PIL Image img = Image.fromarray(img) if self.transform is not None: img = self.transform(img) return img, label
Example #22
Source File: dataloader.py From FewShotWithoutForgetting with MIT License | 4 votes |
def __init__(self, phase='train', split='train', do_not_use_random_transf=False): self.phase = phase self.split = split assert(phase=='train' or phase=='test' or phase=='val') assert(split=='train' or split=='val') self.name = 'ImageNetLowShot_Phase_' + phase + '_Split_' + split print('Loading ImageNet dataset (for few-shot benchmark) - phase {0}'. format(phase)) #*********************************************************************** with open(_IMAGENET_LOWSHOT_BENCHMARK_CATEGORY_SPLITS_PATH, 'r') as f: label_idx = json.load(f) base_classes = label_idx['base_classes'] novel_classes_val_phase = label_idx['novel_classes_1'] novel_classes_test_phase = label_idx['novel_classes_2'] #*********************************************************************** transforms_list = [] if (phase!='train') or (do_not_use_random_transf==True): transforms_list.append(transforms.Scale(256)) transforms_list.append(transforms.CenterCrop(224)) else: transforms_list.append(transforms.RandomSizedCrop(224)) jitter_params = {'Brightness': 0.4, 'Contrast': 0.4, 'Color': 0.4} transforms_list.append(ImageJitter(jitter_params)) transforms_list.append(transforms.RandomHorizontalFlip()) transforms_list.append(lambda x: np.asarray(x)) transforms_list.append(transforms.ToTensor()) mean_pix = [0.485, 0.456, 0.406] std_pix = [0.229, 0.224, 0.225] transforms_list.append(transforms.Normalize(mean=mean_pix, std=std_pix)) self.transform = transforms.Compose(transforms_list) traindir = os.path.join(_IMAGENET_DATASET_DIR, 'train') valdir = os.path.join(_IMAGENET_DATASET_DIR, 'val') self.data = datasets.ImageFolder( traindir if split=='train' else valdir, self.transform) self.labels = [item[1] for item in self.data.imgs] self.label2ind = buildLabelIndex(self.labels) self.labelIds = sorted(self.label2ind.keys()) self.num_cats = len(self.labelIds) assert(self.num_cats==1000) self.labelIds_base = base_classes self.num_cats_base = len(self.labelIds_base) if self.phase=='val' or self.phase=='test': self.labelIds_novel = ( novel_classes_val_phase if (self.phase=='val') else novel_classes_test_phase) self.num_cats_novel = len(self.labelIds_novel) intersection = set(self.labelIds_base) & set(self.labelIds_novel) assert(len(intersection) == 0)
Example #23
Source File: dataloader.py From FeatureDecoupling with MIT License | 4 votes |
def __init__(self, dataset_name, split, random_sized_crop=False): self.split = split.lower() self.dataset_name = dataset_name.lower() self.name = self.dataset_name + '_' + self.split self.random_sized_crop = random_sized_crop if self.dataset_name=='imagenet': assert(self.split=='train' or self.split=='val') if self.split!='train': transforms_list_augmentation = [transforms.Resize(256), transforms.CenterCrop(224)] else: if self.random_sized_crop: transforms_list_augmentation = [transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip()] else: transforms_list_augmentation = [transforms.Resize(256), transforms.RandomCrop(224), transforms.RandomHorizontalFlip()] self.mean_pix = [0.485, 0.456, 0.406] self.std_pix = [0.229, 0.224, 0.225] transforms_list_normalize = [transforms.ToTensor(), transforms.Normalize(mean=self.mean_pix, std=self.std_pix)] self.transform_augmentation_normalize = transforms.Compose(transforms_list_augmentation+transforms_list_normalize) split_data_dir = env.IMAGENET_DIR + '/' + self.split self.data = datasets.ImageFolder(split_data_dir, self.transform_augmentation_normalize) elif self.dataset_name=='places205': if self.split!='train': transforms_list_augmentation = [transforms.CenterCrop(224)] else: if self.random_sized_crop: transforms_list_augmentation = [transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip()] else: transforms_list_augmentation = [transforms.RandomCrop(224), transforms.RandomHorizontalFlip()] # ImageNet mean and var for ImageNet pretrained models. self.mean_pix = [0.485, 0.456, 0.406] self.std_pix = [0.229, 0.224, 0.225] transforms_list_normalize = [transforms.ToTensor(), transforms.Normalize(mean=self.mean_pix, std=self.std_pix)] self.transform_augmentation_normalize = transforms.Compose(transforms_list_augmentation+transforms_list_normalize) self.data = Places205(root=env.PLACES205_DIR, split=self.split, transform=self.transform_augmentation_normalize) else: raise ValueError('Not recognized dataset {0}'.format(self.dataset_name))
Example #24
Source File: datasets.py From Adaptive-Regularization-Neural-Network with MIT License | 4 votes |
def get_Dataset(args): if args.dataset == 'MNIST': tr_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ]) te_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ]) elif args.dataset == 'CIFAR10' or args.dataset == 'CIFAR100': tr_transform = transforms.Compose( [transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]) te_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]) trains = getattr(datasets, args.dataset)(root='./data', train=True, download=True, transform=tr_transform) tests = getattr(datasets, args.dataset)(root='./data', train=False, download=True, transform=te_transform) kwargs = {"num_workers": 1, "pin_memory": True} if torch.cuda.is_available() else {} if args.trainPartial: #np.random.seed(seed = args.trainSize_seed) idx = np.arange(len(trains)) # len(trains) = 60000 np.random.shuffle(idx) train_idx = idx[:args.trainSize] train_sampler = SubsetRandomSampler(train_idx) train_loader = torch.utils.data.DataLoader(trains, batch_size=args.batch_size, sampler=train_sampler, **kwargs) else: train_loader = torch.utils.data.DataLoader(trains, batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(tests, batch_size=args.batch_size, shuffle=True, **kwargs) return train_loader, test_loader
Example #25
Source File: dataset_analytics.py From DeepDIVA with GNU Lesser General Public License v3.0 | 4 votes |
def compute_mean_std_segmentation(dataset_folder, inmem, workers, filter_boundaries): """ Computes mean and std of a dataset for semantic segmentation. Saves the results as CSV file in the dataset folder. Parameters ---------- dataset_folder : String (path) Path to the dataset folder (see above for details) inmem : Boolean Specifies whether is should be computed i nan online of offline fashion. workers : int Number of workers to use for the mean/std computation filter_boundaries : bool specifies whether thr boundary pixels should be removed or not Returns ------- None """ # Getting the train dir traindir = os.path.join(dataset_folder, 'train') # Load the dataset file names train_ds = datasets.ImageFolder(traindir, transform=transforms.Compose([transforms.ToTensor()])) # Extract the actual file names and labels as entries file_names_all = np.asarray([item[0] for item in train_ds.imgs]) file_names_gt = np.asarray([f for f in file_names_all if '/gt/' in f]) file_names_data = np.asarray([f for f in file_names_all if '/data/' in f]) # Compute mean and std if inmem: mean, std = cms_inmem(file_names_data) else: mean, std = cms_online(file_names_data, workers) # Compute class frequencies weights class_frequencies_weights, class_ints = _get_class_frequencies_weights_segmentation(file_names_gt, filter_boundaries) # print(class_frequencies_weights) # Save results as CSV file in the dataset folder df = pd.DataFrame([mean, std, class_frequencies_weights, class_ints]) df.index = ['mean[RGB]', 'std[RGB]', 'class_frequencies_weights[num_classes]', 'class_encodings'] df.to_csv(os.path.join(dataset_folder, 'analytics.csv'), header=False) # Loads an image with OpenCV and returns the channel wise means of the image.
Example #26
Source File: data.py From xfer with Apache License 2.0 | 4 votes |
def generate_data(args, model_cfg): print("Loading dataset {} from {}".format(args.dataset, args.data_path)) dataset = getattr(torchvision.datasets, args.dataset) path = os.path.join(args.data_path, args.dataset.lower()) if args.dataset == DatasetTypes.stl10: train_set = dataset( root=path, split="train", download=False, transform=model_cfg.transform_train ) num_classes = 10 # this is a manual mapping of STL10 classes to CIFAR10 classes # CIFAR10 classes: {automobile, bird, cat, deer, dog, frog, horse, ship} # STL10 classes: {airplane, bird, car, cat, deer, dog, horse, monkey, ship, truck} cls_mapping = np.array([0, 2, 1, 3, 4, 5, 7, 6, 8, 9]) train_set.labels = cls_mapping[train_set.labels] test_set = dataset( root=path, split="test", download=False, transform=model_cfg.transform_test ) test_set.labels = cls_mapping[test_set.labels] else: train_set = dataset( root=path, train=True, download=False, transform=model_cfg.transform_train ) # zero indexing so the max target is one less than the number of classes num_classes = max(train_set.targets) + 1 test_set = dataset( root=path, train=False, download=False, transform=model_cfg.transform_test ) loaders = { "train": torch.utils.data.DataLoader( train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, ), "test": torch.utils.data.DataLoader( test_set, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True, ), } num_data = len(loaders["train"].dataset) print("Number of data points: ", num_data) return loaders, num_classes, num_data
Example #27
Source File: dataloader.py From DCP with BSD 3-Clause "New" or "Revised" License | 4 votes |
def get_cifar_dataloader(dataset, batch_size, n_threads=4, data_path='/home/dataset/', logger=None): """ Get dataloader for cifar10/cifar100 :param dataset: the name of the dataset :param batch_size: how many samples per batch to load :param n_threads: how many subprocesses to use for data loading. :param data_path: the path of dataset :param logger: logger for logging """ logger.info("|===>Get datalaoder for " + dataset) if dataset == 'cifar10': norm_mean = [0.49139968, 0.48215827, 0.44653124] norm_std = [0.24703233, 0.24348505, 0.26158768] elif dataset == 'cifar100': norm_mean = [0.50705882, 0.48666667, 0.44078431] norm_std = [0.26745098, 0.25568627, 0.27607843] data_root = os.path.join(data_path, 'cifar') train_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(norm_mean, norm_std)]) val_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(norm_mean, norm_std)]) if dataset == 'cifar10': train_dataset = datasets.CIFAR10(root=data_root, train=True, transform=train_transform, download=True) val_dataset = datasets.CIFAR10(root=data_root, train=False, transform=val_transform) elif dataset == 'cifar100': train_dataset = datasets.CIFAR100(root=data_root, train=True, transform=train_transform, download=True) val_dataset = datasets.CIFAR100(root=data_root, train=False, transform=val_transform) else: logger.info("invalid data set") assert False, "invalid data set" train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=n_threads) val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=n_threads) return train_loader, val_loader
Example #28
Source File: dataloader.py From DCP with BSD 3-Clause "New" or "Revised" License | 4 votes |
def get_imagenet_dataloader(dataset, batch_size, n_threads=4, data_path='/home/dataset/', logger=None): """ Get dataloader for imagenet :param dataset: the name of the dataset :param batch_size: how many samples per batch to load :param n_threads: how many subprocesses to use for data loading. :param data_path: the path of dataset :param logger: logger for logging """ logger.info("|===>Get datalaoder for " + dataset) dataset_path = os.path.join(data_path, dataset) traindir = os.path.join(dataset_path, "train") valdir = os.path.join(dataset_path, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader( datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])), batch_size=batch_size, shuffle=True, num_workers=n_threads, pin_memory=True) val_loader = torch.utils.data.DataLoader( datasets.ImageFolder(valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=batch_size, shuffle=False, num_workers=n_threads, pin_memory=True) return train_loader, val_loader
Example #29
Source File: dataloader.py From DCP with BSD 3-Clause "New" or "Revised" License | 4 votes |
def get_sub_imagenet_dataloader(dataset, batch_size, num_samples_per_category, n_threads=4, data_path='/home/dataset/', logger=None): """ Get dataloader for imagenet :param dataset: the name of the dataset :param batch_size: how many samples per batch to load :param n_threads: how many subprocesses to use for data loading. :param data_path: the path of dataset :param logger: logger for logging """ logger.info("|===>Get datalaoder for " + dataset) dataset_path = os.path.join(data_path, "imagenet") traindir = os.path.join(dataset_path, "train") valdir = os.path.join(dataset_path, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) val_dataset = datasets.ImageFolder(valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])) stratified_categories_index = get_stratified_categories_index(train_dataset) stratified_sampler = StratifiedSampler(stratified_categories_index, num_samples_per_category) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=batch_size, sampler=stratified_sampler, num_workers=n_threads, pin_memory=True) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=batch_size, shuffle=False, num_workers=n_threads, pin_memory=True) return train_loader, val_loader
Example #30
Source File: dataset_analytics.py From DeepDIVA with GNU Lesser General Public License v3.0 | 4 votes |
def compute_mean_std(dataset_folder, inmem, workers): """ Computes mean and std of a dataset. Saves the results as CSV file in the dataset folder. Parameters ---------- dataset_folder : String (path) Path to the dataset folder (see above for details) inmem : Boolean Specifies whether is should be computed i nan online of offline fashion. workers : int Number of workers to use for the mean/std computation Returns ------- None """ # Getting the train dir traindir = os.path.join(dataset_folder, 'train') # Sanity check on the training folder if not os.path.isdir(traindir): logging.warning("Train folder not found in the args.dataset_folder={}".format(dataset_folder)) return # Load the dataset file names train_ds = datasets.ImageFolder(traindir, transform=transforms.Compose([transforms.ToTensor()])) # Extract the actual file names and labels as entries file_names = np.asarray([item[0] for item in train_ds.imgs]) # Compute mean and std if inmem: mean, std = cms_inmem(file_names) else: mean, std = cms_online(file_names, workers) # Check if the dataset is a multi-label dataset if not os.path.exists(os.path.join(traindir, 'labels.csv')): # Use normal class frequency computation class_frequencies_weights = _get_class_frequencies_weights(train_ds, workers) else: # Use multi-label class frequency computation class_frequencies_weights = _get_class_frequencies_weights_multilabel(os.path.join(traindir, 'labels.csv')) # Save results as CSV file in the dataset folder df = pd.DataFrame([mean, std, class_frequencies_weights]) df.index = ['mean[RGB]', 'std[RGB]', 'class_frequencies_weights[num_classes]'] df.to_csv(os.path.join(dataset_folder, 'analytics.csv'), header=False)