Python torchvision.datasets() Examples

The following are 30 code examples of torchvision.datasets(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torchvision , or try the search function .
Example #1
Source File: dataloader.py    From imagenet18_old with The Unlicense 7 votes vote down vote up
def get_loaders(traindir, valdir, sz, bs, fp16=True, val_bs=None, workers=8, rect_val=False, min_scale=0.08, distributed=False):
    val_bs = val_bs or bs
    train_tfms = [
            transforms.RandomResizedCrop(sz, scale=(min_scale, 1.0)),
            transforms.RandomHorizontalFlip()
    ]
    train_dataset = datasets.ImageFolder(traindir, transforms.Compose(train_tfms))
    train_sampler = (DistributedSampler(train_dataset, num_replicas=env_world_size(), rank=env_rank()) if distributed else None)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=bs, shuffle=(train_sampler is None),
        num_workers=workers, pin_memory=True, collate_fn=fast_collate, 
        sampler=train_sampler)

    val_dataset, val_sampler = create_validation_set(valdir, val_bs, sz, rect_val=rect_val, distributed=distributed)
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        num_workers=workers, pin_memory=True, collate_fn=fast_collate, 
        batch_sampler=val_sampler)

    train_loader = BatchTransformDataLoader(train_loader, fp16=fp16)
    val_loader = BatchTransformDataLoader(val_loader, fp16=fp16)

    return train_loader, val_loader, train_sampler, val_sampler 
Example #2
Source File: cifarloader.py    From DTC with MIT License 6 votes vote down vote up
def __getitem__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (image, target) where target is index of the target class.
        """
        img, target = self.data[index], self.targets[index]

        # doing this so that it is consistent with all other datasets
        # to return a PIL Image
        img = Image.fromarray(img)

        if self.transform is not None:
            img = self.transform(img)

        if self.target_transform is not None:
            target = self.target_transform(target)

        return img, target, index 
Example #3
Source File: train_model.py    From super-events-cvpr18 with MIT License 6 votes vote down vote up
def load_data(train_split, val_split, root):
    # Load Data

    if len(train_split) > 0:
        dataset = Dataset(train_split, 'training', root, batch_size)
        dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True, collate_fn=collate_fn)
        dataloader.root = root
    else:
        
        dataset = None
        dataloader = None

    val_dataset = Dataset(val_split, 'testing', root, batch_size)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=True, num_workers=2, pin_memory=True, collate_fn=collate_fn)
    val_dataloader.root = root

    dataloaders = {'train': dataloader, 'val': val_dataloader}
    datasets = {'train': dataset, 'val': val_dataset}
    return dataloaders, datasets


# train the model 
Example #4
Source File: svhnloader.py    From DTC with MIT License 6 votes vote down vote up
def __getitem__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (image, target) where target is index of the target class.
        """
        img, target = self.data[index], int(self.labels[index])

        # doing this so that it is consistent with all other datasets
        # to return a PIL Image
        img = Image.fromarray(np.transpose(img, (1, 2, 0)))

        if self.transform is not None:
            img = self.transform(img)

        if self.target_transform is not None:
            target = self.target_transform(target)

        return img, target, index 
Example #5
Source File: dataloader.py    From FewShotWithoutForgetting with MIT License 6 votes vote down vote up
def __init__(self, split='train'):
        self.split = split
        assert(split=='train' or split=='val')
        self.name = 'ImageNet_Split_' + split

        print('Loading ImageNet dataset - split {0}'.format(split))
        transforms_list = []
        transforms_list.append(transforms.Scale(256))
        transforms_list.append(transforms.CenterCrop(224))
        transforms_list.append(lambda x: np.asarray(x))
        transforms_list.append(transforms.ToTensor())
        mean_pix = [0.485, 0.456, 0.406]
        std_pix = [0.229, 0.224, 0.225]
        transforms_list.append(transforms.Normalize(mean=mean_pix, std=std_pix))
        self.transform = transforms.Compose(transforms_list)

        traindir = os.path.join(_IMAGENET_DATASET_DIR, 'train')
        valdir = os.path.join(_IMAGENET_DATASET_DIR, 'val')
        self.data = datasets.ImageFolder(
            traindir if split=='train' else valdir, self.transform)
        self.labels = [item[1] for item in self.data.imgs] 
Example #6
Source File: CycleGAN.py    From Deep-learning-with-cats with GNU General Public License v3.0 5 votes vote down vote up
def __getitem__(self, i):
        return tuple(d[i] for d in self.datasets) 
Example #7
Source File: create_image_dataloader.py    From Auto-PyTorch with Apache License 2.0 5 votes vote down vote up
def get_pipeline_config_options(self):
        options = [
            ConfigOption("default_dataset_download_dir", default=ConfigFileParser.get_autonet_home(), type='directory', info="Directory default datasets will be downloaded to."),
            ConfigOption("dataloader_worker", default=1, type=int),
            ConfigOption("dataloader_cache_size_mb", default=0, type=int)
        ]
        return options 
Example #8
Source File: data.py    From dnn-mode-connectivity with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def loaders(dataset, path, batch_size, num_workers, transform_name, use_test=False,
            shuffle_train=True):
    ds = getattr(torchvision.datasets, dataset)
    path = os.path.join(path, dataset.lower())
    transform = getattr(getattr(Transforms, dataset), transform_name)
    train_set = ds(path, train=True, download=True, transform=transform.train)

    if use_test:
        print('You are going to run models on the test set. Are you sure?')
        test_set = ds(path, train=False, download=True, transform=transform.test)
    else:
        print("Using train (45000) + validation (5000)")
        train_set.train_data = train_set.train_data[:-5000]
        train_set.train_labels = train_set.train_labels[:-5000]

        test_set = ds(path, train=True, download=True, transform=transform.test)
        test_set.train = False
        test_set.test_data = test_set.train_data[-5000:]
        test_set.test_labels = test_set.train_labels[-5000:]
        delattr(test_set, 'train_data')
        delattr(test_set, 'train_labels')

    return {
               'train': torch.utils.data.DataLoader(
                   train_set,
                   batch_size=batch_size,
                   shuffle=shuffle_train,
                   num_workers=num_workers,
                   pin_memory=True
               ),
               'test': torch.utils.data.DataLoader(
                   test_set,
                   batch_size=batch_size,
                   shuffle=False,
                   num_workers=num_workers,
                   pin_memory=True
               ),
           }, max(train_set.train_labels) + 1 
Example #9
Source File: main.py    From binary-wide-resnet with MIT License 5 votes vote down vote up
def create_dataset(args, train):
    transform = T.Compose([
        T.ToTensor(),
        T.Normalize(np.array([125.3, 123.0, 113.9]) / 255.0,
                    np.array([63.0, 62.1, 66.7]) / 255.0),
    ])
    if train:
        transform = T.Compose([
            T.Pad(4, padding_mode='reflect'),
            T.RandomHorizontalFlip(),
            T.RandomCrop(32),
            transform
        ])
    return getattr(datasets, args.dataset)(args.dataroot, train=train, download=True, transform=transform) 
Example #10
Source File: loaders.py    From WatermarkNN with MIT License 5 votes vote down vote up
def getdataloader(datatype, train_db_path, test_db_path, batch_size):
    # get transformations
    transform_train, transform_test = _getdatatransformsdb(datatype=datatype)
    n_classes = 0

    # Data loaders
    if datatype.lower() == CIFAR10:
        print("Using CIFAR10 dataset.")
        trainset = torchvision.datasets.CIFAR10(root=train_db_path,
                                                train=True, download=True,
                                                transform=transform_train)
        testset = torchvision.datasets.CIFAR10(root=test_db_path,
                                               train=False, download=True,
                                               transform=transform_test)
        n_classes = 10
    elif datatype.lower() == CIFAR100:
        print("Using CIFAR100 dataset.")
        trainset = torchvision.datasets.CIFAR100(root=train_db_path,
                                                 train=True, download=True,
                                                 transform=transform_train)
        testset = torchvision.datasets.CIFAR100(root=test_db_path,
                                                train=False, download=True,
                                                transform=transform_test)
        n_classes = 100
    else:
        print("Dataset is not supported.")
        return None, None, None

    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                              shuffle=True, num_workers=4)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                             shuffle=False, num_workers=4)
    return trainloader, testloader, n_classes 
Example #11
Source File: dataloader.py    From FewShotWithoutForgetting with MIT License 5 votes vote down vote up
def __getitem__(self, index):
        img, label = self.data[index], self.labels[index]
        # doing this so that it is consistent with all other datasets
        # to return a PIL Image
        img = Image.fromarray(img)
        if self.transform is not None:
            img = self.transform(img)
        return img, label 
Example #12
Source File: dataloader.py    From imagenet18_old with The Unlicense 5 votes vote down vote up
def sort_ar(valdir):
    idx2ar_file = valdir+'/../sorted_idxar.p'
    if os.path.isfile(idx2ar_file): return pickle.load(open(idx2ar_file, 'rb'))
    print('Creating AR indexes. Please be patient this may take a couple minutes...')
    val_dataset = datasets.ImageFolder(valdir) # AS: TODO: use Image.open instead of looping through dataset
    sizes = [img[0].size for img in tqdm(val_dataset, total=len(val_dataset))]
    idx_ar = [(i, round(s[0]/s[1], 5)) for i,s in enumerate(sizes)]
    sorted_idxar = sorted(idx_ar, key=lambda x: x[1])
    pickle.dump(sorted_idxar, open(idx2ar_file, 'wb'))
    print('Done')
    return sorted_idxar 
Example #13
Source File: dataloader.py    From imagenet18_old with The Unlicense 5 votes vote down vote up
def create_validation_set(valdir, batch_size, target_size, rect_val, distributed):
    if rect_val:
        idx_ar_sorted = sort_ar(valdir)
        idx_sorted, _ = zip(*idx_ar_sorted)
        idx2ar = map_idx2ar(idx_ar_sorted, batch_size)

        ar_tfms = [transforms.Resize(int(target_size*1.14)), CropArTfm(idx2ar, target_size)]
        val_dataset = ValDataset(valdir, transform=ar_tfms)
        val_sampler = DistValSampler(idx_sorted, batch_size=batch_size, distributed=distributed)
        return val_dataset, val_sampler
    
    val_tfms = [transforms.Resize(int(target_size*1.14)), transforms.CenterCrop(target_size)]
    val_dataset = datasets.ImageFolder(valdir, transforms.Compose(val_tfms))
    val_sampler = DistValSampler(list(range(len(val_dataset))), batch_size=batch_size, distributed=distributed)
    return val_dataset, val_sampler 
Example #14
Source File: toy.py    From Hydra with MIT License 5 votes vote down vote up
def toy(dataset,
        root='~/data/torchvision/',
        transforms=None):
    """Load a train and test datasets from torchvision.dataset.
    """
    if not hasattr(torchvision.datasets, dataset):
        raise ValueError
    loader_def = getattr(torchvision.datasets, dataset)

    transform_funcs = []
    if transforms is not None:
        for transform in transforms:
            if not hasattr(torchvision.transforms, transform['def']):
                raise ValueError
            transform_def = getattr(torchvision.transforms, transform['def'])
            transform_funcs.append(transform_def(**transform['kwargs']))
    transform_funcs.append(torchvision.transforms.ToTensor())

    composed_transform = torchvision.transforms.Compose(transform_funcs)
    trainset = loader_def(
            root=os.path.expanduser(root), train=True,
            download=True, transform=composed_transform)
    testset = loader_def(
            root=os.path.expanduser(root), train=False,
            download=True, transform=composed_transform)
    return trainset, testset 
Example #15
Source File: CycleGAN.py    From Deep-learning-with-cats with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, *datasets):
        self.datasets = datasets 
Example #16
Source File: CycleGAN.py    From Deep-learning-with-cats with GNU General Public License v3.0 5 votes vote down vote up
def __len__(self):
        return min(len(d) for d in self.datasets) 
Example #17
Source File: test_first_block.py    From kinetics_i3d_pytorch with MIT License 5 votes vote down vote up
def test_input_block():
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    dataset = datasets.ImageFolder('/sequoia/data1/yhasson/datasets/test-dataset',
            transforms.Compose([
            transforms.RandomSizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    densenet = torchvision.models.densenet121(pretrained=True)
    features = densenet.features
    seq2d = torch.nn.Sequential(
        features.conv0, features.norm0, features.relu0, features.pool0)
    seq3d = torch.nn.Sequential(
        inflate.inflate_conv(features.conv0, 3),
        inflate.inflate_batch_norm(features.norm0),
        features.relu0,
        inflate.inflate_pool(features.pool0, 1))

    loader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=False)
    frame_nb = 4
    for i, (input_2d, target) in enumerate(loader):
        target = target.cuda()
        target_var = torch.autograd.Variable(target)
        input_2d_var = torch.autograd.Variable(input_2d)
        out2d = seq2d(input_2d_var)
        time_pad = torch.nn.ReplicationPad3d((0, 0, 0, 0, 1, 1))
        input_3d = input_2d.unsqueeze(2).repeat(1, 1, frame_nb, 1, 1)
        input_3d_var = time_pad(input_3d) 
        out3d = seq3d(input_3d_var)
        expected_out_3d = out2d.data.unsqueeze(2).repeat(1, 1, frame_nb, 1, 1)
        out_diff = expected_out_3d - out3d.data
        print(out_diff.max())
        assert(out_diff.max() < 0.0001) 
Example #18
Source File: tiered_imagenet.py    From MetaOptNet with Apache License 2.0 5 votes vote down vote up
def __getitem__(self, index):
        img, label = self.data[index], self.labels[index]
        # doing this so that it is consistent with all other datasets
        # to return a PIL Image
        img = Image.fromarray(img)
        if self.transform is not None:
            img = self.transform(img)
        return img, label 
Example #19
Source File: CIFAR_FS.py    From MetaOptNet with Apache License 2.0 5 votes vote down vote up
def __getitem__(self, index):
        img, label = self.data[index], self.labels[index]
        # doing this so that it is consistent with all other datasets
        # to return a PIL Image
        img = Image.fromarray(img)
        if self.transform is not None:
            img = self.transform(img)
        return img, label 
Example #20
Source File: mini_imagenet.py    From MetaOptNet with Apache License 2.0 5 votes vote down vote up
def __getitem__(self, index):
        img, label = self.data[index], self.labels[index]
        # doing this so that it is consistent with all other datasets
        # to return a PIL Image
        img = Image.fromarray(img)
        if self.transform is not None:
            img = self.transform(img)
        return img, label 
Example #21
Source File: FC100.py    From MetaOptNet with Apache License 2.0 5 votes vote down vote up
def __getitem__(self, index):
        img, label = self.data[index], self.labels[index]
        # doing this so that it is consistent with all other datasets
        # to return a PIL Image
        img = Image.fromarray(img)
        if self.transform is not None:
            img = self.transform(img)
        return img, label 
Example #22
Source File: dataloader.py    From FewShotWithoutForgetting with MIT License 4 votes vote down vote up
def __init__(self,
                 phase='train',
                 split='train',
                 do_not_use_random_transf=False):
        self.phase = phase
        self.split = split
        assert(phase=='train' or phase=='test' or phase=='val')
        assert(split=='train' or split=='val')
        self.name = 'ImageNetLowShot_Phase_' + phase + '_Split_' + split

        print('Loading ImageNet dataset (for few-shot benchmark) - phase {0}'.
            format(phase))

        #***********************************************************************
        with open(_IMAGENET_LOWSHOT_BENCHMARK_CATEGORY_SPLITS_PATH, 'r') as f:
            label_idx = json.load(f)
        base_classes = label_idx['base_classes']
        novel_classes_val_phase = label_idx['novel_classes_1']
        novel_classes_test_phase = label_idx['novel_classes_2']
        #***********************************************************************

        transforms_list = []
        if (phase!='train') or (do_not_use_random_transf==True):
            transforms_list.append(transforms.Scale(256))
            transforms_list.append(transforms.CenterCrop(224))
        else:
            transforms_list.append(transforms.RandomSizedCrop(224))
            jitter_params = {'Brightness': 0.4, 'Contrast': 0.4, 'Color': 0.4}
            transforms_list.append(ImageJitter(jitter_params))
            transforms_list.append(transforms.RandomHorizontalFlip())

        transforms_list.append(lambda x: np.asarray(x))
        transforms_list.append(transforms.ToTensor())
        mean_pix = [0.485, 0.456, 0.406]
        std_pix = [0.229, 0.224, 0.225]
        transforms_list.append(transforms.Normalize(mean=mean_pix, std=std_pix))

        self.transform = transforms.Compose(transforms_list)

        traindir = os.path.join(_IMAGENET_DATASET_DIR, 'train')
        valdir = os.path.join(_IMAGENET_DATASET_DIR, 'val')
        self.data = datasets.ImageFolder(
            traindir if split=='train' else valdir, self.transform)
        self.labels = [item[1] for item in self.data.imgs]

        self.label2ind = buildLabelIndex(self.labels)
        self.labelIds = sorted(self.label2ind.keys())
        self.num_cats = len(self.labelIds)
        assert(self.num_cats==1000)

        self.labelIds_base = base_classes
        self.num_cats_base = len(self.labelIds_base)
        if self.phase=='val' or self.phase=='test':
            self.labelIds_novel = (
                novel_classes_val_phase if (self.phase=='val') else
                novel_classes_test_phase)
            self.num_cats_novel = len(self.labelIds_novel)

            intersection = set(self.labelIds_base) & set(self.labelIds_novel)
            assert(len(intersection) == 0) 
Example #23
Source File: dataloader.py    From FeatureDecoupling with MIT License 4 votes vote down vote up
def __init__(self, dataset_name, split, random_sized_crop=False):
        self.split = split.lower()
        self.dataset_name =  dataset_name.lower()
        self.name = self.dataset_name + '_' + self.split
        self.random_sized_crop = random_sized_crop

        if self.dataset_name=='imagenet':
            assert(self.split=='train' or self.split=='val')

            if self.split!='train':
                transforms_list_augmentation = [transforms.Resize(256),
                                                transforms.CenterCrop(224)]
            else:
                if self.random_sized_crop:
                    transforms_list_augmentation = [transforms.RandomResizedCrop(224),
                                                    transforms.RandomHorizontalFlip()]
                else:
                    transforms_list_augmentation = [transforms.Resize(256),
                                                    transforms.RandomCrop(224),
                                                    transforms.RandomHorizontalFlip()]

            self.mean_pix = [0.485, 0.456, 0.406]
            self.std_pix = [0.229, 0.224, 0.225]
            transforms_list_normalize = [transforms.ToTensor(),
                                         transforms.Normalize(mean=self.mean_pix, std=self.std_pix)]

            self.transform_augmentation_normalize = transforms.Compose(transforms_list_augmentation+transforms_list_normalize)
            split_data_dir = env.IMAGENET_DIR + '/' + self.split
            self.data = datasets.ImageFolder(split_data_dir, self.transform_augmentation_normalize)

        elif self.dataset_name=='places205':
            if self.split!='train':
                transforms_list_augmentation = [transforms.CenterCrop(224)]
            else:
                if self.random_sized_crop:
                    transforms_list_augmentation = [transforms.RandomResizedCrop(224),
                                                    transforms.RandomHorizontalFlip()]
                else:
                    transforms_list_augmentation = [transforms.RandomCrop(224),
                                                    transforms.RandomHorizontalFlip()]

            # ImageNet mean and var for ImageNet pretrained models.
            self.mean_pix = [0.485, 0.456, 0.406]
            self.std_pix = [0.229, 0.224, 0.225]
            transforms_list_normalize = [transforms.ToTensor(),
                                         transforms.Normalize(mean=self.mean_pix, std=self.std_pix)]

            self.transform_augmentation_normalize = transforms.Compose(transforms_list_augmentation+transforms_list_normalize)
            self.data = Places205(root=env.PLACES205_DIR, split=self.split, transform=self.transform_augmentation_normalize)

        else:
            raise ValueError('Not recognized dataset {0}'.format(self.dataset_name)) 
Example #24
Source File: datasets.py    From Adaptive-Regularization-Neural-Network with MIT License 4 votes vote down vote up
def get_Dataset(args):
    if args.dataset == 'MNIST':
        tr_transform = transforms.Compose([
                                transforms.ToTensor(),
                                transforms.Normalize((0.1307,), (0.3081,))
                            ])
        te_transform = transforms.Compose([
                                transforms.ToTensor(),
                                transforms.Normalize((0.1307,), (0.3081,))
                            ])
    elif args.dataset == 'CIFAR10' or args.dataset == 'CIFAR100':
        tr_transform = transforms.Compose(
            [transforms.RandomCrop(32, padding=4),
             transforms.RandomHorizontalFlip(),
             transforms.ToTensor(),
             transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
        
        te_transform = transforms.Compose(
            [transforms.ToTensor(),
             transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
    
    trains = getattr(datasets, args.dataset)(root='./data', train=True,
                                        download=True, transform=tr_transform)

    tests = getattr(datasets, args.dataset)(root='./data', train=False,
                                       download=True, transform=te_transform)

    kwargs = {"num_workers": 1, "pin_memory": True} if torch.cuda.is_available() else {}
    
    if args.trainPartial:
        #np.random.seed(seed = args.trainSize_seed)
        idx = np.arange(len(trains)) # len(trains) = 60000
        np.random.shuffle(idx)
        train_idx = idx[:args.trainSize]
        train_sampler = SubsetRandomSampler(train_idx)
        train_loader = torch.utils.data.DataLoader(trains, batch_size=args.batch_size, sampler=train_sampler, **kwargs)
    else:
        train_loader = torch.utils.data.DataLoader(trains, batch_size=args.batch_size, shuffle=True, **kwargs)
    
    test_loader = torch.utils.data.DataLoader(tests, batch_size=args.batch_size, shuffle=True, **kwargs)
    
    return train_loader, test_loader 
Example #25
Source File: dataset_analytics.py    From DeepDIVA with GNU Lesser General Public License v3.0 4 votes vote down vote up
def compute_mean_std_segmentation(dataset_folder, inmem, workers, filter_boundaries):
    """
    Computes mean and std of a dataset for semantic segmentation. Saves the results as CSV file in the dataset folder.

    Parameters
    ----------
    dataset_folder : String (path)
        Path to the dataset folder (see above for details)
    inmem : Boolean
        Specifies whether is should be computed i nan online of offline fashion.
    workers : int
        Number of workers to use for the mean/std computation
    filter_boundaries : bool
        specifies whether thr boundary pixels should be removed or not
    Returns
    -------
        None
    """

    # Getting the train dir
    traindir = os.path.join(dataset_folder, 'train')

    # Load the dataset file names
    train_ds = datasets.ImageFolder(traindir, transform=transforms.Compose([transforms.ToTensor()]))

    # Extract the actual file names and labels as entries
    file_names_all = np.asarray([item[0] for item in train_ds.imgs])
    file_names_gt = np.asarray([f for f in file_names_all if '/gt/' in f])
    file_names_data = np.asarray([f for f in file_names_all if '/data/' in f])

    # Compute mean and std
    if inmem:
        mean, std = cms_inmem(file_names_data)
    else:
        mean, std = cms_online(file_names_data, workers)

    # Compute class frequencies weights
    class_frequencies_weights, class_ints = _get_class_frequencies_weights_segmentation(file_names_gt, filter_boundaries)
    # print(class_frequencies_weights)
    # Save results as CSV file in the dataset folder
    df = pd.DataFrame([mean, std, class_frequencies_weights, class_ints])
    df.index = ['mean[RGB]', 'std[RGB]', 'class_frequencies_weights[num_classes]', 'class_encodings']
    df.to_csv(os.path.join(dataset_folder, 'analytics.csv'), header=False)


# Loads an image with OpenCV and returns the channel wise means of the image. 
Example #26
Source File: data.py    From xfer with Apache License 2.0 4 votes vote down vote up
def generate_data(args, model_cfg):
    print("Loading dataset {} from {}".format(args.dataset, args.data_path))
    dataset = getattr(torchvision.datasets, args.dataset)
    path = os.path.join(args.data_path, args.dataset.lower())

    if args.dataset == DatasetTypes.stl10:
        train_set = dataset(
            root=path, split="train", download=False, transform=model_cfg.transform_train
        )
        num_classes = 10
        # this is a manual mapping of STL10 classes to CIFAR10 classes
        # CIFAR10 classes: {automobile, bird, cat, deer, dog, frog, horse, ship}
        # STL10 classes: {airplane, bird, car, cat, deer, dog, horse, monkey, ship, truck}
        cls_mapping = np.array([0, 2, 1, 3, 4, 5, 7, 6, 8, 9])
        train_set.labels = cls_mapping[train_set.labels]

        test_set = dataset(
            root=path, split="test", download=False, transform=model_cfg.transform_test
        )
        test_set.labels = cls_mapping[test_set.labels]
    else:
        train_set = dataset(
            root=path, train=True, download=False, transform=model_cfg.transform_train
        )
        # zero indexing so the max target is one less than the number of classes
        num_classes = max(train_set.targets) + 1

        test_set = dataset(
            root=path, train=False, download=False, transform=model_cfg.transform_test
        )

    loaders = {
        "train": torch.utils.data.DataLoader(
            train_set,
            batch_size=args.batch_size,
            shuffle=True,
            num_workers=args.num_workers,
            pin_memory=True,
        ),
        "test": torch.utils.data.DataLoader(
            test_set,
            batch_size=args.batch_size,
            shuffle=False,
            num_workers=args.num_workers,
            pin_memory=True,
        ),
    }
    num_data = len(loaders["train"].dataset)
    print("Number of data points: ", num_data)
    return loaders, num_classes, num_data 
Example #27
Source File: dataloader.py    From DCP with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def get_cifar_dataloader(dataset, batch_size, n_threads=4, data_path='/home/dataset/', logger=None):
    """
    Get dataloader for cifar10/cifar100
    :param dataset: the name of the dataset
    :param batch_size: how many samples per batch to load
    :param n_threads:  how many subprocesses to use for data loading.
    :param data_path: the path of dataset
    :param logger: logger for logging
    """

    logger.info("|===>Get datalaoder for " + dataset)

    if dataset == 'cifar10':
        norm_mean = [0.49139968, 0.48215827, 0.44653124]
        norm_std = [0.24703233, 0.24348505, 0.26158768]
    elif dataset == 'cifar100':
        norm_mean = [0.50705882, 0.48666667, 0.44078431]
        norm_std = [0.26745098, 0.25568627, 0.27607843]
    data_root = os.path.join(data_path, 'cifar')

    train_transform = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(norm_mean, norm_std)])
    val_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(norm_mean, norm_std)])

    if dataset == 'cifar10':
        train_dataset = datasets.CIFAR10(root=data_root,
                                         train=True,
                                         transform=train_transform,
                                         download=True)
        val_dataset = datasets.CIFAR10(root=data_root,
                                       train=False,
                                       transform=val_transform)
    elif dataset == 'cifar100':
        train_dataset = datasets.CIFAR100(root=data_root,
                                          train=True,
                                          transform=train_transform,
                                          download=True)
        val_dataset = datasets.CIFAR100(root=data_root,
                                        train=False,
                                        transform=val_transform)
    else:
        logger.info("invalid data set")
        assert False, "invalid data set"

    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               pin_memory=True,
                                               num_workers=n_threads)
    val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=n_threads)
    return train_loader, val_loader 
Example #28
Source File: dataloader.py    From DCP with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def get_imagenet_dataloader(dataset, batch_size, n_threads=4, data_path='/home/dataset/', logger=None):
    """
    Get dataloader for imagenet
    :param dataset: the name of the dataset
    :param batch_size: how many samples per batch to load
    :param n_threads:  how many subprocesses to use for data loading.
    :param data_path: the path of dataset
    :param logger: logger for logging
    """

    logger.info("|===>Get datalaoder for " + dataset)

    dataset_path = os.path.join(data_path, dataset)
    traindir = os.path.join(dataset_path, "train")
    valdir = os.path.join(dataset_path, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder(
            traindir,
            transforms.Compose([
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ])),
        batch_size=batch_size,
        shuffle=True,
        num_workers=n_threads,
        pin_memory=True)

    val_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder(valdir, transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
        batch_size=batch_size,
        shuffle=False,
        num_workers=n_threads,
        pin_memory=True)
    return train_loader, val_loader 
Example #29
Source File: dataloader.py    From DCP with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def get_sub_imagenet_dataloader(dataset, batch_size, num_samples_per_category,
                                n_threads=4, data_path='/home/dataset/', logger=None):
    """
    Get dataloader for imagenet
    :param dataset: the name of the dataset
    :param batch_size: how many samples per batch to load
    :param n_threads:  how many subprocesses to use for data loading.
    :param data_path: the path of dataset
    :param logger: logger for logging
    """

    logger.info("|===>Get datalaoder for " + dataset)

    dataset_path = os.path.join(data_path, "imagenet")
    traindir = os.path.join(dataset_path, "train")
    valdir = os.path.join(dataset_path, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))
    val_dataset = datasets.ImageFolder(valdir, transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize,
    ]))
    stratified_categories_index = get_stratified_categories_index(train_dataset)
    stratified_sampler = StratifiedSampler(stratified_categories_index, num_samples_per_category)

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        sampler=stratified_sampler,
        num_workers=n_threads,
        pin_memory=True)

    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=n_threads,
        pin_memory=True)
    return train_loader, val_loader 
Example #30
Source File: dataset_analytics.py    From DeepDIVA with GNU Lesser General Public License v3.0 4 votes vote down vote up
def compute_mean_std(dataset_folder, inmem, workers):
    """
    Computes mean and std of a dataset. Saves the results as CSV file in the dataset folder.

    Parameters
    ----------
    dataset_folder : String (path)
        Path to the dataset folder (see above for details)
    inmem : Boolean
        Specifies whether is should be computed i nan online of offline fashion.
    workers : int
        Number of workers to use for the mean/std computation

    Returns
    -------
        None
    """

    # Getting the train dir
    traindir = os.path.join(dataset_folder, 'train')

    # Sanity check on the training folder
    if not os.path.isdir(traindir):
        logging.warning("Train folder not found in the args.dataset_folder={}".format(dataset_folder))
        return

    # Load the dataset file names
    train_ds = datasets.ImageFolder(traindir, transform=transforms.Compose([transforms.ToTensor()]))

    # Extract the actual file names and labels as entries
    file_names = np.asarray([item[0] for item in train_ds.imgs])

    # Compute mean and std
    if inmem:
        mean, std = cms_inmem(file_names)
    else:
        mean, std = cms_online(file_names, workers)

    # Check if the dataset is a multi-label dataset
    if not os.path.exists(os.path.join(traindir, 'labels.csv')):
        # Use normal class frequency computation
        class_frequencies_weights = _get_class_frequencies_weights(train_ds, workers)
    else:
        # Use multi-label class frequency computation
        class_frequencies_weights = _get_class_frequencies_weights_multilabel(os.path.join(traindir, 'labels.csv'))

    # Save results as CSV file in the dataset folder
    df = pd.DataFrame([mean, std, class_frequencies_weights])
    df.index = ['mean[RGB]', 'std[RGB]', 'class_frequencies_weights[num_classes]']
    df.to_csv(os.path.join(dataset_folder, 'analytics.csv'), header=False)