Python torch.utils.data.dataset.Subset() Examples
The following are 10
code examples of torch.utils.data.dataset.Subset().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.utils.data.dataset
, or try the search function
.
Example #1
Source File: dataloaders.py From ignite with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_inference_dataloader( root_path: str, mode: str, transforms: Callable, batch_size: int = 16, num_workers: int = 8, pin_memory: bool = True, limit_num_samples: Optional[int] = None, ) -> DataLoader: assert mode in ("train", "test"), "Mode should be 'train' or 'test'" get_dataset_fn = get_train_dataset if mode == "train" else get_val_dataset dataset = get_dataset_fn(root_path, return_meta=True) if limit_num_samples is not None: indices = np.random.permutation(len(dataset))[:limit_num_samples] dataset = Subset(dataset, indices) dataset = TransformedDataset(dataset, transform_fn=transforms) loader = DataLoader( dataset, shuffle=False, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory, drop_last=False ) return loader
Example #2
Source File: utils.py From skorch with BSD 3-Clause "New" or "Revised" License | 5 votes |
def data_from_dataset(dataset, X_indexing=None, y_indexing=None): """Try to access X and y attribute from dataset. Also works when dataset is a subset. Parameters ---------- dataset : skorch.dataset.Dataset or torch.utils.data.Subset The incoming dataset should be a ``skorch.dataset.Dataset`` or a ``torch.utils.data.Subset`` of a ``skorch.dataset.Dataset``. X_indexing : function/callable or None (default=None) If not None, use this function for indexing into the X data. If None, try to automatically determine how to index data. y_indexing : function/callable or None (default=None) If not None, use this function for indexing into the y data. If None, try to automatically determine how to index data. """ X, y = _none, _none if isinstance(dataset, Subset): X, y = data_from_dataset( dataset.dataset, X_indexing=X_indexing, y_indexing=y_indexing) X = multi_indexing(X, dataset.indices, indexing=X_indexing) y = multi_indexing(y, dataset.indices, indexing=y_indexing) elif hasattr(dataset, 'X') and hasattr(dataset, 'y'): X, y = dataset.X, dataset.y if (X is _none) or (y is _none): raise AttributeError("Could not access X and y from dataset.") return X, y
Example #3
Source File: utils.py From skorch with BSD 3-Clause "New" or "Revised" License | 5 votes |
def is_skorch_dataset(ds): """Checks if the supplied dataset is an instance of ``skorch.dataset.Dataset`` even when it is nested inside ``torch.util.data.Subset``.""" from skorch.dataset import Dataset if isinstance(ds, Subset): return is_skorch_dataset(ds.dataset) return isinstance(ds, Dataset) # pylint: disable=unused-argument
Example #4
Source File: test_utils.py From skorch with BSD 3-Clause "New" or "Revised" License | 5 votes |
def subset(self, skorch_ds): from torch.utils.data.dataset import Subset return Subset(skorch_ds, [1, 3])
Example #5
Source File: test_utils.py From skorch with BSD 3-Clause "New" or "Revised" License | 5 votes |
def subset_subset(self, subset): from torch.utils.data.dataset import Subset return Subset(subset, [0]) # pylint: disable=missing-docstring
Example #6
Source File: create_dataloader.py From Auto-PyTorch with Apache License 2.0 | 5 votes |
def fit(self, pipeline_config, hyperparameter_config, X, Y, train_indices, valid_indices): torch.manual_seed(pipeline_config["random_seed"]) hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config) # prepare data drop_last = hyperparameter_config['batch_size'] < train_indices.shape[0] X, Y = to_dense(X), to_dense(Y) X, Y = torch.from_numpy(X).float(), torch.from_numpy(Y) train_dataset = TensorDataset(X, Y) train_loader = DataLoader( dataset=train_dataset, batch_size=hyperparameter_config['batch_size'], sampler=SubsetRandomSampler(train_indices), shuffle=False, drop_last=drop_last) valid_loader = None if valid_indices is not None: valid_loader = DataLoader( dataset=Subset(train_dataset, valid_indices), batch_size=hyperparameter_config['batch_size'], shuffle=False, drop_last=False) return {'train_loader': train_loader, 'valid_loader': valid_loader, 'batch_size': hyperparameter_config['batch_size']}
Example #7
Source File: test_scoring.py From skorch with BSD 3-Clause "New" or "Revised" License | 4 votes |
def test_net_input_is_scoring_input( self, net_cls, module_cls, scoring_cls, data, ): # Make sure that whatever data type is put in the network is # received at the scoring side as well. For the caching case # we only receive datasets. import skorch from skorch.dataset import CVSplit import torch.utils.data.dataset from torch.utils.data.dataset import Subset class MyTorchDataset(torch.utils.data.dataset.TensorDataset): def __init__(self, X, y): super().__init__( skorch.utils.to_tensor(X.reshape(-1, 1), device='cpu'), skorch.utils.to_tensor(y, device='cpu')) class MySkorchDataset(skorch.dataset.Dataset): pass rawsplit = lambda ds: (ds, ds) cvsplit = CVSplit(2, random_state=0) def split_ignore_y(ds, y): return rawsplit(ds) table = [ # Test a split where type(input) == type(output) is guaranteed (data, split_ignore_y, np.ndarray, False), (data, split_ignore_y, skorch.dataset.Dataset, True), ((MyTorchDataset(*data), None), rawsplit, MyTorchDataset, False), ((MyTorchDataset(*data), None), rawsplit, MyTorchDataset, True), ((MySkorchDataset(*data), None), rawsplit, np.ndarray, False), ((MySkorchDataset(*data), None), rawsplit, MySkorchDataset, True), # Test a split that splits datasets using torch Subset (data, cvsplit, np.ndarray, False), (data, cvsplit, Subset, True), ((MyTorchDataset(*data), None), cvsplit, Subset, False), ((MyTorchDataset(*data), None), cvsplit, Subset, True), ((MySkorchDataset(*data), None), cvsplit, np.ndarray, False), ((MySkorchDataset(*data), None), cvsplit, Subset, True), ] for input_data, train_split, expected_type, caching in table: self.net_input_is_scoring_input( net_cls, module_cls, scoring_cls, input_data, train_split, expected_type, caching)
Example #8
Source File: test_dataloaders.py From pytorch-lightning with Apache License 2.0 | 4 votes |
def test_batch_size_smaller_than_num_gpus(tmpdir): # we need at least 3 gpus for this test num_gpus = 3 batch_size = 3 class CurrentTestModel(EvalModelTemplate): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # batch norm doesn't work with batch size 1, we replace it self.c_d1_bn = torch.nn.ReLU() def training_step(self, *args, **kwargs): output = super().training_step(*args, **kwargs) loss = output['loss'] # we make sure to add some metrics to the output dict, # this is essential for this test output['progress_bar'] = {'train_loss': loss} return output def train_dataloader(self): dataloader = super().train_dataloader() # construct a dataset with a size that is not divisible by num_gpus # therefore the last batch will have a size < num_gpus size = num_gpus * batch_size + (num_gpus - 1) dataset = Subset(dataloader.dataset, range(size)) dataloader = DataLoader( dataset, batch_size=self.batch_size, drop_last=False, ) return dataloader hparams = EvalModelTemplate.get_default_hparams() hparams['batch_size'] = batch_size model = CurrentTestModel(**hparams) trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, limit_train_batches=0.1, limit_val_batches=0, gpus=num_gpus, ) # we expect the reduction for the metrics also to happen on the last batch # where we will get fewer metrics than gpus result = trainer.fit(model) assert 1 == result
Example #9
Source File: dataloaders.py From ignite with BSD 3-Clause "New" or "Revised" License | 4 votes |
def get_train_val_loaders( root_path: str, train_transforms: Callable, val_transforms: Callable, batch_size: int = 16, num_workers: int = 8, val_batch_size: Optional[int] = None, with_sbd: Optional[str] = None, limit_train_num_samples: Optional[int] = None, limit_val_num_samples: Optional[int] = None, ) -> Tuple[DataLoader, DataLoader, DataLoader]: train_ds = get_train_dataset(root_path) val_ds = get_val_dataset(root_path) if with_sbd is not None: sbd_train_ds = get_train_noval_sbdataset(with_sbd) train_ds = ConcatDataset([train_ds, sbd_train_ds]) if limit_train_num_samples is not None: np.random.seed(limit_train_num_samples) train_indices = np.random.permutation(len(train_ds))[:limit_train_num_samples] train_ds = Subset(train_ds, train_indices) if limit_val_num_samples is not None: np.random.seed(limit_val_num_samples) val_indices = np.random.permutation(len(val_ds))[:limit_val_num_samples] val_ds = Subset(val_ds, val_indices) # random samples for evaluation on training dataset if len(val_ds) < len(train_ds): np.random.seed(len(val_ds)) train_eval_indices = np.random.permutation(len(train_ds))[: len(val_ds)] train_eval_ds = Subset(train_ds, train_eval_indices) else: train_eval_ds = train_ds train_ds = TransformedDataset(train_ds, transform_fn=train_transforms) val_ds = TransformedDataset(val_ds, transform_fn=val_transforms) train_eval_ds = TransformedDataset(train_eval_ds, transform_fn=val_transforms) train_loader = idist.auto_dataloader( train_ds, shuffle=True, batch_size=batch_size, num_workers=num_workers, drop_last=True, ) val_batch_size = batch_size * 4 if val_batch_size is None else val_batch_size val_loader = idist.auto_dataloader( val_ds, shuffle=False, batch_size=val_batch_size, num_workers=num_workers, drop_last=False, ) train_eval_loader = idist.auto_dataloader( train_eval_ds, shuffle=False, batch_size=val_batch_size, num_workers=num_workers, drop_last=False, ) return train_loader, val_loader, train_eval_loader
Example #10
Source File: dataloaders.py From ignite with BSD 3-Clause "New" or "Revised" License | 4 votes |
def get_train_val_loaders( root_path: str, train_transforms: Callable, val_transforms: Callable, batch_size: int = 16, num_workers: int = 8, val_batch_size: Optional[int] = None, limit_train_num_samples: Optional[int] = None, limit_val_num_samples: Optional[int] = None, ) -> Tuple[DataLoader, DataLoader, DataLoader]: train_ds = ImageNet( root_path, split="train", transform=lambda sample: train_transforms(image=sample)["image"], loader=opencv_loader ) val_ds = ImageNet( root_path, split="val", transform=lambda sample: val_transforms(image=sample)["image"], loader=opencv_loader ) if limit_train_num_samples is not None: np.random.seed(limit_train_num_samples) train_indices = np.random.permutation(len(train_ds))[:limit_train_num_samples] train_ds = Subset(train_ds, train_indices) if limit_val_num_samples is not None: np.random.seed(limit_val_num_samples) val_indices = np.random.permutation(len(val_ds))[:limit_val_num_samples] val_ds = Subset(val_ds, val_indices) # random samples for evaluation on training dataset if len(val_ds) < len(train_ds): np.random.seed(len(val_ds)) train_eval_indices = np.random.permutation(len(train_ds))[: len(val_ds)] train_eval_ds = Subset(train_ds, train_eval_indices) else: train_eval_ds = train_ds train_loader = idist.auto_dataloader( train_ds, shuffle=True, batch_size=batch_size, num_workers=num_workers, drop_last=True, ) val_batch_size = batch_size * 4 if val_batch_size is None else val_batch_size val_loader = idist.auto_dataloader( val_ds, shuffle=False, batch_size=val_batch_size, num_workers=num_workers, drop_last=False, ) train_eval_loader = idist.auto_dataloader( train_eval_ds, shuffle=False, batch_size=val_batch_size, num_workers=num_workers, drop_last=False, ) return train_loader, val_loader, train_eval_loader