Python torch.utils.data.dataset.Subset() Examples

The following are 10 code examples of torch.utils.data.dataset.Subset(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch.utils.data.dataset , or try the search function .
Example #1
Source File: dataloaders.py    From ignite with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def get_inference_dataloader(
    root_path: str,
    mode: str,
    transforms: Callable,
    batch_size: int = 16,
    num_workers: int = 8,
    pin_memory: bool = True,
    limit_num_samples: Optional[int] = None,
) -> DataLoader:
    assert mode in ("train", "test"), "Mode should be 'train' or 'test'"

    get_dataset_fn = get_train_dataset if mode == "train" else get_val_dataset

    dataset = get_dataset_fn(root_path, return_meta=True)

    if limit_num_samples is not None:
        indices = np.random.permutation(len(dataset))[:limit_num_samples]
        dataset = Subset(dataset, indices)

    dataset = TransformedDataset(dataset, transform_fn=transforms)

    loader = DataLoader(
        dataset, shuffle=False, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory, drop_last=False
    )
    return loader 
Example #2
Source File: utils.py    From skorch with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def data_from_dataset(dataset, X_indexing=None, y_indexing=None):
    """Try to access X and y attribute from dataset.

    Also works when dataset is a subset.

    Parameters
    ----------
    dataset : skorch.dataset.Dataset or torch.utils.data.Subset
      The incoming dataset should be a ``skorch.dataset.Dataset`` or a
      ``torch.utils.data.Subset`` of a
      ``skorch.dataset.Dataset``.

    X_indexing : function/callable or None (default=None)
      If not None, use this function for indexing into the X data. If
      None, try to automatically determine how to index data.

    y_indexing : function/callable or None (default=None)
      If not None, use this function for indexing into the y data. If
      None, try to automatically determine how to index data.

    """
    X, y = _none, _none

    if isinstance(dataset, Subset):
        X, y = data_from_dataset(
            dataset.dataset, X_indexing=X_indexing, y_indexing=y_indexing)
        X = multi_indexing(X, dataset.indices, indexing=X_indexing)
        y = multi_indexing(y, dataset.indices, indexing=y_indexing)
    elif hasattr(dataset, 'X') and hasattr(dataset, 'y'):
        X, y = dataset.X, dataset.y

    if (X is _none) or (y is _none):
        raise AttributeError("Could not access X and y from dataset.")
    return X, y 
Example #3
Source File: utils.py    From skorch with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def is_skorch_dataset(ds):
    """Checks if the supplied dataset is an instance of
    ``skorch.dataset.Dataset`` even when it is nested inside
    ``torch.util.data.Subset``."""
    from skorch.dataset import Dataset
    if isinstance(ds, Subset):
        return is_skorch_dataset(ds.dataset)
    return isinstance(ds, Dataset)


# pylint: disable=unused-argument 
Example #4
Source File: test_utils.py    From skorch with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def subset(self, skorch_ds):
        from torch.utils.data.dataset import Subset
        return Subset(skorch_ds, [1, 3]) 
Example #5
Source File: test_utils.py    From skorch with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def subset_subset(self, subset):
        from torch.utils.data.dataset import Subset
        return Subset(subset, [0])

    # pylint: disable=missing-docstring 
Example #6
Source File: create_dataloader.py    From Auto-PyTorch with Apache License 2.0 5 votes vote down vote up
def fit(self, pipeline_config, hyperparameter_config, X, Y, train_indices, valid_indices):
    
        torch.manual_seed(pipeline_config["random_seed"])
        hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config)

        # prepare data
        drop_last = hyperparameter_config['batch_size'] < train_indices.shape[0]
        X, Y = to_dense(X), to_dense(Y)
        X, Y = torch.from_numpy(X).float(), torch.from_numpy(Y)

        train_dataset = TensorDataset(X, Y)
        train_loader = DataLoader(
            dataset=train_dataset,
            batch_size=hyperparameter_config['batch_size'], 
            sampler=SubsetRandomSampler(train_indices),
            shuffle=False,
            drop_last=drop_last)
            
        valid_loader = None
        if valid_indices is not None:
            valid_loader = DataLoader(
                dataset=Subset(train_dataset, valid_indices),
                batch_size=hyperparameter_config['batch_size'],
                shuffle=False,
                drop_last=False)

        return {'train_loader': train_loader, 'valid_loader': valid_loader, 'batch_size': hyperparameter_config['batch_size']} 
Example #7
Source File: test_scoring.py    From skorch with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def test_net_input_is_scoring_input(
            self, net_cls, module_cls, scoring_cls, data,
    ):
        # Make sure that whatever data type is put in the network is
        # received at the scoring side as well. For the caching case
        # we only receive datasets.
        import skorch
        from skorch.dataset import CVSplit
        import torch.utils.data.dataset
        from torch.utils.data.dataset import Subset

        class MyTorchDataset(torch.utils.data.dataset.TensorDataset):
            def __init__(self, X, y):
                super().__init__(
                    skorch.utils.to_tensor(X.reshape(-1, 1), device='cpu'),
                    skorch.utils.to_tensor(y, device='cpu'))

        class MySkorchDataset(skorch.dataset.Dataset):
            pass

        rawsplit = lambda ds: (ds, ds)
        cvsplit = CVSplit(2, random_state=0)

        def split_ignore_y(ds, y):
            return rawsplit(ds)

        table = [
            # Test a split where type(input) == type(output) is guaranteed
            (data, split_ignore_y, np.ndarray, False),
            (data, split_ignore_y, skorch.dataset.Dataset, True),
            ((MyTorchDataset(*data), None), rawsplit, MyTorchDataset, False),
            ((MyTorchDataset(*data), None), rawsplit, MyTorchDataset, True),
            ((MySkorchDataset(*data), None), rawsplit, np.ndarray, False),
            ((MySkorchDataset(*data), None), rawsplit, MySkorchDataset, True),

            # Test a split that splits datasets using torch Subset
            (data, cvsplit, np.ndarray, False),
            (data, cvsplit, Subset, True),
            ((MyTorchDataset(*data), None), cvsplit, Subset, False),
            ((MyTorchDataset(*data), None), cvsplit, Subset, True),
            ((MySkorchDataset(*data), None), cvsplit, np.ndarray, False),
            ((MySkorchDataset(*data), None), cvsplit, Subset, True),
        ]

        for input_data, train_split, expected_type, caching in table:
            self.net_input_is_scoring_input(
                net_cls,
                module_cls,
                scoring_cls,
                input_data,
                train_split,
                expected_type,
                caching) 
Example #8
Source File: test_dataloaders.py    From pytorch-lightning with Apache License 2.0 4 votes vote down vote up
def test_batch_size_smaller_than_num_gpus(tmpdir):
    # we need at least 3 gpus for this test
    num_gpus = 3
    batch_size = 3

    class CurrentTestModel(EvalModelTemplate):

        def __init__(self, *args, **kwargs):
            super().__init__(*args, **kwargs)
            # batch norm doesn't work with batch size 1, we replace it
            self.c_d1_bn = torch.nn.ReLU()

        def training_step(self, *args, **kwargs):
            output = super().training_step(*args, **kwargs)
            loss = output['loss']
            # we make sure to add some metrics to the output dict,
            # this is essential for this test
            output['progress_bar'] = {'train_loss': loss}
            return output

        def train_dataloader(self):
            dataloader = super().train_dataloader()
            # construct a dataset with a size that is not divisible by num_gpus
            # therefore the last batch will have a size < num_gpus
            size = num_gpus * batch_size + (num_gpus - 1)
            dataset = Subset(dataloader.dataset, range(size))
            dataloader = DataLoader(
                dataset,
                batch_size=self.batch_size,
                drop_last=False,
            )
            return dataloader

    hparams = EvalModelTemplate.get_default_hparams()
    hparams['batch_size'] = batch_size
    model = CurrentTestModel(**hparams)

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=1,
        limit_train_batches=0.1,
        limit_val_batches=0,
        gpus=num_gpus,
    )

    # we expect the reduction for the metrics also to happen on the last batch
    # where we will get fewer metrics than gpus
    result = trainer.fit(model)
    assert 1 == result 
Example #9
Source File: dataloaders.py    From ignite with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def get_train_val_loaders(
    root_path: str,
    train_transforms: Callable,
    val_transforms: Callable,
    batch_size: int = 16,
    num_workers: int = 8,
    val_batch_size: Optional[int] = None,
    with_sbd: Optional[str] = None,
    limit_train_num_samples: Optional[int] = None,
    limit_val_num_samples: Optional[int] = None,
) -> Tuple[DataLoader, DataLoader, DataLoader]:

    train_ds = get_train_dataset(root_path)
    val_ds = get_val_dataset(root_path)

    if with_sbd is not None:
        sbd_train_ds = get_train_noval_sbdataset(with_sbd)
        train_ds = ConcatDataset([train_ds, sbd_train_ds])

    if limit_train_num_samples is not None:
        np.random.seed(limit_train_num_samples)
        train_indices = np.random.permutation(len(train_ds))[:limit_train_num_samples]
        train_ds = Subset(train_ds, train_indices)

    if limit_val_num_samples is not None:
        np.random.seed(limit_val_num_samples)
        val_indices = np.random.permutation(len(val_ds))[:limit_val_num_samples]
        val_ds = Subset(val_ds, val_indices)

    # random samples for evaluation on training dataset
    if len(val_ds) < len(train_ds):
        np.random.seed(len(val_ds))
        train_eval_indices = np.random.permutation(len(train_ds))[: len(val_ds)]
        train_eval_ds = Subset(train_ds, train_eval_indices)
    else:
        train_eval_ds = train_ds

    train_ds = TransformedDataset(train_ds, transform_fn=train_transforms)
    val_ds = TransformedDataset(val_ds, transform_fn=val_transforms)
    train_eval_ds = TransformedDataset(train_eval_ds, transform_fn=val_transforms)

    train_loader = idist.auto_dataloader(
        train_ds, shuffle=True, batch_size=batch_size, num_workers=num_workers, drop_last=True,
    )

    val_batch_size = batch_size * 4 if val_batch_size is None else val_batch_size
    val_loader = idist.auto_dataloader(
        val_ds, shuffle=False, batch_size=val_batch_size, num_workers=num_workers, drop_last=False,
    )

    train_eval_loader = idist.auto_dataloader(
        train_eval_ds, shuffle=False, batch_size=val_batch_size, num_workers=num_workers, drop_last=False,
    )

    return train_loader, val_loader, train_eval_loader 
Example #10
Source File: dataloaders.py    From ignite with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def get_train_val_loaders(
    root_path: str,
    train_transforms: Callable,
    val_transforms: Callable,
    batch_size: int = 16,
    num_workers: int = 8,
    val_batch_size: Optional[int] = None,
    limit_train_num_samples: Optional[int] = None,
    limit_val_num_samples: Optional[int] = None,
) -> Tuple[DataLoader, DataLoader, DataLoader]:

    train_ds = ImageNet(
        root_path, split="train", transform=lambda sample: train_transforms(image=sample)["image"], loader=opencv_loader
    )
    val_ds = ImageNet(
        root_path, split="val", transform=lambda sample: val_transforms(image=sample)["image"], loader=opencv_loader
    )

    if limit_train_num_samples is not None:
        np.random.seed(limit_train_num_samples)
        train_indices = np.random.permutation(len(train_ds))[:limit_train_num_samples]
        train_ds = Subset(train_ds, train_indices)

    if limit_val_num_samples is not None:
        np.random.seed(limit_val_num_samples)
        val_indices = np.random.permutation(len(val_ds))[:limit_val_num_samples]
        val_ds = Subset(val_ds, val_indices)

    # random samples for evaluation on training dataset
    if len(val_ds) < len(train_ds):
        np.random.seed(len(val_ds))
        train_eval_indices = np.random.permutation(len(train_ds))[: len(val_ds)]
        train_eval_ds = Subset(train_ds, train_eval_indices)
    else:
        train_eval_ds = train_ds

    train_loader = idist.auto_dataloader(
        train_ds, shuffle=True, batch_size=batch_size, num_workers=num_workers, drop_last=True,
    )

    val_batch_size = batch_size * 4 if val_batch_size is None else val_batch_size
    val_loader = idist.auto_dataloader(
        val_ds, shuffle=False, batch_size=val_batch_size, num_workers=num_workers, drop_last=False,
    )

    train_eval_loader = idist.auto_dataloader(
        train_eval_ds, shuffle=False, batch_size=val_batch_size, num_workers=num_workers, drop_last=False,
    )

    return train_loader, val_loader, train_eval_loader