Python torch.utils.data.BatchSampler() Examples
The following are 16
code examples of torch.utils.data.BatchSampler().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.utils.data
, or try the search function
.
Example #1
Source File: test_deterministic.py From ignite with BSD 3-Clause "New" or "Revised" License | 7 votes |
def test_engine_with_dataloader_no_auto_batching(): # tests https://github.com/pytorch/ignite/issues/941 from torch.utils.data import DataLoader, BatchSampler, RandomSampler data = torch.rand(64, 4, 10) data_loader = DataLoader( data, batch_size=None, sampler=BatchSampler(RandomSampler(data), batch_size=8, drop_last=True) ) counter = [0] def foo(e, b): print("{}-{}: {}".format(e.state.epoch, e.state.iteration, b)) counter[0] += 1 engine = DeterministicEngine(foo) engine.run(data_loader, epoch_length=10, max_epochs=5) assert counter[0] == 50
Example #2
Source File: probe.py From atari-representation-learning with MIT License | 6 votes |
def generate_batch(self, episodes, episode_labels): total_steps = sum([len(e) for e in episodes]) assert total_steps > self.batch_size print('Total Steps: {}'.format(total_steps)) # Episode sampler # Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch sampler = BatchSampler(RandomSampler(range(len(episodes)), replacement=True, num_samples=total_steps), self.batch_size, drop_last=True) for indices in sampler: episodes_batch = [episodes[x] for x in indices] episode_labels_batch = [episode_labels[x] for x in indices] xs, labels = [], appendabledict() for ep_ind, episode in enumerate(episodes_batch): # Get one sample from this episode t = np.random.randint(len(episode)) xs.append(episode[t]) labels.append_update(episode_labels_batch[ep_ind][t]) yield torch.stack(xs).float().to(self.device) / 255., labels
Example #3
Source File: stdim.py From atari-representation-learning with MIT License | 6 votes |
def generate_batch(self, episodes): total_steps = sum([len(e) for e in episodes]) print('Total Steps: {}'.format(total_steps)) # Episode sampler # Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch sampler = BatchSampler(RandomSampler(range(len(episodes)), replacement=True, num_samples=total_steps), self.batch_size, drop_last=True) for indices in sampler: episodes_batch = [episodes[x] for x in indices] x_t, x_tprev, x_that, ts, thats = [], [], [], [], [] for episode in episodes_batch: # Get one sample from this episode t, t_hat = 0, 0 t, t_hat = np.random.randint(0, len(episode)), np.random.randint(0, len(episode)) x_t.append(episode[t]) x_tprev.append(episode[t - 1]) ts.append([t]) yield torch.stack(x_t).float().to(self.device) / 255., torch.stack(x_tprev).float().to(self.device) / 255.
Example #4
Source File: no_action_feedforward_predictor.py From atari-representation-learning with MIT License | 6 votes |
def generate_batch(self, episodes): total_steps = sum([len(e) for e in episodes]) print('Total Steps: {}'.format(total_steps)) # Episode sampler # Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch sampler = BatchSampler(RandomSampler(range(len(episodes)), replacement=True, num_samples=total_steps), self.batch_size, drop_last=True) for indices in sampler: episodes_batch = [episodes[x] for x in indices] x_t, x_tn = [], [] for episode in episodes_batch: # Get one sample from this episode t = np.random.randint(0, len(episode) - self.pred_offset) t_n = t + self.pred_offset x_t.append(episode[t]) x_tn.append(episode[t_n]) yield torch.stack(x_t).float().to(self.device) / 255., \ torch.stack(x_tn).float().to(self.device) / 255.
Example #5
Source File: vae.py From atari-representation-learning with MIT License | 6 votes |
def generate_batch(self, episodes): total_steps = sum([len(e) for e in episodes]) print('Total Steps: {}'.format(total_steps)) # Episode sampler # Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch sampler = BatchSampler(RandomSampler(range(len(episodes)), replacement=True, num_samples=total_steps), self.batch_size, drop_last=True) for indices in sampler: episodes_batch = [episodes[x] for x in indices] x_t, x_tprev, x_that, ts, thats = [], [], [], [], [] for episode in episodes_batch: # Get one sample from this episode t, t_hat = 0, 0 t, t_hat = np.random.randint(0, len(episode)), np.random.randint(0, len(episode)) x_t.append(episode[t]) yield torch.stack(x_t).float().to(self.device) / 255.
Example #6
Source File: test_engine.py From ignite with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_engine_with_dataloader_no_auto_batching(): # tests https://github.com/pytorch/ignite/issues/941 from torch.utils.data import DataLoader, BatchSampler, RandomSampler data = torch.rand(64, 4, 10) data_loader = DataLoader( data, batch_size=None, sampler=BatchSampler(RandomSampler(data), batch_size=8, drop_last=True) ) counter = [0] def foo(e, b): print("{}-{}: {}".format(e.state.epoch, e.state.iteration, b)) counter[0] += 1 engine = Engine(foo) engine.run(data_loader, epoch_length=10, max_epochs=5) assert counter[0] == 50
Example #7
Source File: global_infonce_stdim.py From atari-representation-learning with MIT License | 5 votes |
def generate_batch(self, episodes): total_steps = sum([len(e) for e in episodes]) print('Total Steps: {}'.format(total_steps)) # Episode sampler # Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch sampler = BatchSampler(RandomSampler(range(len(episodes)), replacement=True, num_samples=total_steps), self.batch_size, drop_last=True) for indices in sampler: episodes_batch = [episodes[x] for x in indices] x_t, x_tprev, x_that, ts, thats = [], [], [], [], [] for episode in episodes_batch: # Get one sample from this episode t, t_hat = 0, 0 t, t_hat = np.random.randint(0, len(episode)), np.random.randint(0, len(episode)) x_t.append(episode[t]) # Apply the same transform to x_{t-1} and x_{t_hat} # https://github.com/pytorch/vision/issues/9#issuecomment-383110707 # Use numpy's random seed because Cutout uses np # seed = random.randint(0, 2 ** 32) # np.random.seed(seed) x_tprev.append(episode[t - 1]) # np.random.seed(seed) #x_that.append(episode[t_hat]) ts.append([t]) #thats.append([t_hat]) yield torch.stack(x_t).float().to(self.device) / 255., torch.stack(x_tprev).float().to(self.device) / 255.
Example #8
Source File: global_local_infonce.py From atari-representation-learning with MIT License | 5 votes |
def generate_batch(self, episodes): total_steps = sum([len(e) for e in episodes]) print('Total Steps: {}'.format(total_steps)) # Episode sampler # Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch sampler = BatchSampler(RandomSampler(range(len(episodes)), replacement=True, num_samples=total_steps), self.batch_size, drop_last=True) for indices in sampler: episodes_batch = [episodes[x] for x in indices] x_t, x_tprev, x_that, ts, thats = [], [], [], [], [] for episode in episodes_batch: # Get one sample from this episode t, t_hat = 0, 0 t, t_hat = np.random.randint(0, len(episode)), np.random.randint(0, len(episode)) x_t.append(episode[t]) # Apply the same transform to x_{t-1} and x_{t_hat} # https://github.com/pytorch/vision/issues/9#issuecomment-383110707 # Use numpy's random seed because Cutout uses np # seed = random.randint(0, 2 ** 32) # np.random.seed(seed) x_tprev.append(episode[t - 1]) # np.random.seed(seed) #x_that.append(episode[t_hat]) ts.append([t]) #thats.append([t_hat]) yield torch.stack(x_t).float().to(self.device) / 255., torch.stack(x_tprev).float().to(self.device) / 255.
Example #9
Source File: temporal_dim.py From atari-representation-learning with MIT License | 5 votes |
def generate_batch(self, episodes): total_steps = sum([len(e) for e in episodes]) print('Total Steps: {}'.format(total_steps)) # Episode sampler # Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch sampler = BatchSampler(RandomSampler(range(len(episodes)), replacement=True, num_samples=total_steps), self.batch_size, drop_last=True) for indices in sampler: episodes_batch = [episodes[x] for x in indices] x_t, x_tprev, x_that, ts, thats = [], [], [], [], [] for episode in episodes_batch: # Get one sample from this episode t, t_hat = 0, 0 t, t_hat = np.random.randint(0, len(episode)), np.random.randint(0, len(episode)) x_t.append(episode[t]) # Apply the same transform to x_{t-1} and x_{t_hat} # https://github.com/pytorch/vision/issues/9#issuecomment-383110707 # Use numpy's random seed because Cutout uses np # seed = random.randint(0, 2 ** 32) # np.random.seed(seed) x_tprev.append(episode[t - 1]) # np.random.seed(seed) x_that.append(episode[t_hat]) ts.append([t]) thats.append([t_hat]) yield torch.stack(x_t).float().to(self.device) / 255., torch.stack(x_tprev).float().to(self.device) / 255., \ torch.stack(x_that).float().to(self.device) / 255., torch.Tensor(ts).to(self.device), \ torch.Tensor(thats).to(self.device)
Example #10
Source File: cpc.py From atari-representation-learning with MIT License | 5 votes |
def generate_batch(self, episodes): episodes = [episode for episode in episodes if len(episode) >= self.sequence_length] # Episode sampler # Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch sampler = BatchSampler(RandomSampler(range(len(episodes)), replacement=True, num_samples=len(episodes) * self.sequence_length), self.batch_size, drop_last=True) for indices in sampler: episodes_batch = [episodes[x] for x in indices] sequences = [] for episode in episodes_batch: start_index = np.random.randint(0, len(episode) - self.sequence_length+1) seq = episode[start_index: start_index + self.sequence_length] sequences.append(torch.stack(seq)) yield torch.stack(sequences).float()
Example #11
Source File: dim_baseline.py From atari-representation-learning with MIT License | 5 votes |
def generate_batch(self, episodes): total_steps = sum([len(e) for e in episodes]) print('Total Steps: {}'.format(total_steps)) # Episode sampler # Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch sampler = BatchSampler(RandomSampler(range(len(episodes)), replacement=True, num_samples=total_steps), self.batch_size, drop_last=True) for indices in sampler: episodes_batch = [episodes[x] for x in indices] x_t, x_tprev, x_that, ts, thats = [], [], [], [], [] for episode in episodes_batch: # Get one sample from this episode t, t_hat = 0, 0 t, t_hat = np.random.randint(0, len(episode)), np.random.randint(0, len(episode)) x_t.append(episode[t]) # Apply the same transform to x_{t-1} and x_{t_hat} # https://github.com/pytorch/vision/issues/9#issuecomment-383110707 # Use numpy's random seed because Cutout uses np # seed = random.randint(0, 2 ** 32) # np.random.seed(seed) x_tprev.append(episode[t - 1]) # np.random.seed(seed) #x_that.append(episode[t_hat]) ts.append([t]) #thats.append([t_hat]) yield torch.stack(x_t).float().to(self.device) / 255., torch.stack(x_tprev).float().to(self.device) / 255.
Example #12
Source File: data.py From recoder with MIT License | 5 votes |
def __init__(self, dataset, batch_size, negative_sampling=False, num_sampling_users=0, num_workers=0, collate_fn=None): self.dataset = dataset # type: RecommendationDataset self.num_sampling_users = num_sampling_users self.num_workers = num_workers self.batch_size = batch_size self.negative_sampling = negative_sampling if self.num_sampling_users == 0: self.num_sampling_users = batch_size assert self.num_sampling_users >= batch_size, 'num_sampling_users should be at least equal to the batch_size' self.batch_collator = BatchCollator(batch_size=self.batch_size, negative_sampling=self.negative_sampling) # Wrapping a BatchSampler within a BatchSampler # in order to fetch the whole mini-batch at once # from the dataset instead of fetching each sample on its own batch_sampler = BatchSampler(BatchSampler(RandomSampler(dataset), batch_size=self.num_sampling_users, drop_last=False), batch_size=1, drop_last=False) if collate_fn is None: self._collate_fn = self.batch_collator.collate self._use_default_data_generator = True else: self._collate_fn = collate_fn self._use_default_data_generator = False self._dataloader = DataLoader(dataset, batch_sampler=batch_sampler, num_workers=num_workers, collate_fn=self._collate)
Example #13
Source File: test_deterministic.py From ignite with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_reproducible_batch_sampler_wrong_input(): with pytest.raises(TypeError, match=r"Argument batch_sampler should be torch.utils.data.sampler.BatchSampler"): ReproducibleBatchSampler("abc")
Example #14
Source File: dcca.py From mvlearn with Apache License 2.0 | 5 votes |
def _get_outputs(self, x1, x2): """ Private function to get the transformed data and the corresponding loss for the given inputs. Parameters ---------- x1 : torch.tensor Input view 1 data. x2 : torch.tensor Input view 2 data. Returns ------- losses : list List of losses for each batch taken from the input data. outputs : list of tensors outputs[i] is the output of the deep models for view i. """ with torch.no_grad(): self.model_.eval() data_size = x1.size(0) batch_idxs = list(BatchSampler(SequentialSampler(range(data_size)), batch_size=self.batch_size_, drop_last=False)) losses = [] outputs1 = [] outputs2 = [] for batch_idx in batch_idxs: batch_x1 = x1[batch_idx, :] batch_x2 = x2[batch_idx, :] o1, o2 = self.model_(batch_x1, batch_x2) outputs1.append(o1) outputs2.append(o2) loss = self.loss_(o1, o2) losses.append(loss.item()) outputs = [torch.cat(outputs1, dim=0).cpu().numpy(), torch.cat(outputs2, dim=0).cpu().numpy()] return losses, outputs
Example #15
Source File: dataloader.py From PySyft with Apache License 2.0 | 4 votes |
def __init__( self, federated_dataset, batch_size=8, shuffle=False, num_iterators=1, drop_last=False, collate_fn=default_collate, iter_per_worker=False, **kwargs, ): if len(kwargs) > 0: options = ", ".join([f"{k}: {v}" for k, v in kwargs.items()]) logging.warning(f"The following options are not supported: {options}") try: self.workers = federated_dataset.workers except AttributeError: raise Exception( "Your dataset is not a FederatedDataset, please use " "torch.utils.data.DataLoader instead." ) self.federated_dataset = federated_dataset self.batch_size = batch_size self.drop_last = drop_last self.collate_fn = collate_fn self.iter_class = _DataLoaderOneWorkerIter if iter_per_worker else _DataLoaderIter # Build a batch sampler per worker self.batch_samplers = {} for worker in self.workers: data_range = range(len(federated_dataset[worker])) if shuffle: sampler = RandomSampler(data_range) else: sampler = SequentialSampler(data_range) batch_sampler = BatchSampler(sampler, batch_size, drop_last) self.batch_samplers[worker] = batch_sampler if iter_per_worker: self.num_iterators = len(self.workers) else: # You can't have more iterators than n - 1 workers, because you always # need a worker idle in the worker switch process made by iterators if len(self.workers) == 1: self.num_iterators = 1 else: self.num_iterators = min(num_iterators, len(self.workers) - 1)
Example #16
Source File: dataloader.py From mt-dnn with MIT License | 4 votes |
def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None, num_workers=0, collate_fn=default_collate, pin_memory=False, drop_last=False, timeout=0, worker_init_fn=None): self.dataset = dataset self.batch_size = batch_size self.num_workers = num_workers self.collate_fn = collate_fn self.pin_memory = pin_memory self.drop_last = drop_last self.timeout = timeout self.worker_init_fn = worker_init_fn if timeout < 0: raise ValueError('timeout option should be non-negative') if batch_sampler is not None: if batch_size > 1 or shuffle or sampler is not None or drop_last: raise ValueError('batch_sampler option is mutually exclusive ' 'with batch_size, shuffle, sampler, and ' 'drop_last') self.batch_size = None self.drop_last = None if sampler is not None and shuffle: raise ValueError('sampler option is mutually exclusive with ' 'shuffle') if self.num_workers < 0: raise ValueError('num_workers option cannot be negative; ' 'use num_workers=0 to disable multiprocessing.') if batch_sampler is None: if sampler is None: if shuffle: sampler = RandomSampler(dataset) else: sampler = SequentialSampler(dataset) batch_sampler = BatchSampler(sampler, batch_size, drop_last) self.sampler = sampler self.batch_sampler = batch_sampler self.__initialized = True