Python torch.utils.data.sampler.SubsetRandomSampler() Examples
The following are 30
code examples of torch.utils.data.sampler.SubsetRandomSampler().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.utils.data.sampler
, or try the search function
.
Example #1
Source File: rollout.py From midlevel-reps with MIT License | 6 votes |
def feed_forward_generator(self, advantages, num_mini_batch): num_steps, num_processes = self.rewards.size()[0:2] batch_size = num_processes * num_steps assert batch_size >= num_mini_batch, ( f"PPO requires the number processes ({num_processes}) " f"* number of steps ({num_steps}) = {num_processes * num_steps} " f"to be greater than or equal to the number of PPO mini batches ({num_mini_batch}).") mini_batch_size = batch_size // num_mini_batch sampler = BatchSampler(SubsetRandomSampler(range(batch_size)), mini_batch_size, drop_last=False) for indices in sampler: observations_batch = self.observations[:-1].view(-1, *self.observations.size()[2:])[indices] states_batch = self.states[:-1].view(-1, self.states.size(-1))[indices] actions_batch = self.actions.view(-1, self.actions.size(-1))[indices] return_batch = self.returns[:-1].view(-1, 1)[indices] masks_batch = self.masks[:-1].view(-1, 1)[indices] old_action_log_probs_batch = self.action_log_probs.view(-1, 1)[indices] adv_targ = advantages.view(-1, 1)[indices] yield observations_batch, states_batch, actions_batch, \ return_batch, masks_batch, old_action_log_probs_batch, adv_targ
Example #2
Source File: base_dataloader.py From pytorch_segmentation with MIT License | 6 votes |
def _split_sampler(self, split): if split == 0.0: return None, None self.shuffle = False split_indx = int(self.nbr_examples * split) np.random.seed(0) indxs = np.arange(self.nbr_examples) np.random.shuffle(indxs) train_indxs = indxs[split_indx:] val_indxs = indxs[:split_indx] self.nbr_examples = len(train_indxs) train_sampler = SubsetRandomSampler(train_indxs) val_sampler = SubsetRandomSampler(val_indxs) return train_sampler, val_sampler
Example #3
Source File: storage.py From Actor-Critic-Based-Resource-Allocation-for-Multimodal-Optical-Networks with GNU General Public License v3.0 | 6 votes |
def feed_forward_generator(self, advantages, num_mini_batch): num_steps, num_processes = self.rewards.size()[0:2] batch_size = num_processes * num_steps mini_batch_size = batch_size // num_mini_batch sampler = BatchSampler(SubsetRandomSampler(range(batch_size)), mini_batch_size, drop_last=False) for indices in sampler: indices = torch.LongTensor(indices) if advantages.is_cuda: indices = indices.cuda() observations_batch = self.observations[:-1].view(-1, *self.observations.size()[2:])[indices] actions_batch = self.actions.view(-1, self.actions.size(-1))[indices] return_batch = self.returns[:-1].view(-1, 1)[indices] masks_batch = self.masks[:-1].view(-1, 1)[indices] old_action_log_probs_batch = self.action_log_probs.view(-1, 1)[indices] adv_targ = advantages.view(-1, 1)[indices] yield observations_batch, actions_batch, \ return_batch, masks_batch, old_action_log_probs_batch, adv_targ
Example #4
Source File: base_data_loader.py From ModelFeast with MIT License | 6 votes |
def _split_sampler(self, split): if split == 0.0: return None, None idx_full = np.arange(self.n_samples) np.random.seed(0) np.random.shuffle(idx_full) len_valid = int(self.n_samples * split) valid_idx = idx_full[0:len_valid] train_idx = np.delete(idx_full, np.arange(0, len_valid)) train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) # turn off shuffle option which is mutually exclusive with sampler self.shuffle = False self.n_samples = len(train_idx) return train_sampler, valid_sampler
Example #5
Source File: rollout_storage.py From pytorch-pommerman-rl with MIT License | 6 votes |
def feed_forward_generator(self, advantages, num_mini_batch): num_steps, num_processes = self.rewards.size()[0:2] batch_size = num_processes * num_steps assert batch_size >= num_mini_batch, ( "PPO requires the number of processes ({}) " "* number of steps ({}) = {} " "to be greater than or equal to the number of PPO mini batches ({})." "".format(num_processes, num_steps, num_processes * num_steps, num_mini_batch)) mini_batch_size = batch_size // num_mini_batch sampler = BatchSampler(SubsetRandomSampler(range(batch_size)), mini_batch_size, drop_last=False) for indices in sampler: obs_batch = self.obs[:-1].view(-1, *self.obs.size()[2:])[indices] recurrent_hidden_states_batch = self.recurrent_hidden_states[:-1].view(-1, self.recurrent_hidden_states.size(-1))[indices] actions_batch = self.actions.view(-1, self.actions.size(-1))[indices] return_batch = self.returns[:-1].view(-1, 1)[indices] masks_batch = self.masks[:-1].view(-1, 1)[indices] old_action_log_probs_batch = self.action_log_probs.view(-1, 1)[indices] adv_targ = advantages.view(-1, 1)[indices] yield obs_batch, recurrent_hidden_states_batch, actions_batch, \ return_batch, masks_batch, old_action_log_probs_batch, adv_targ
Example #6
Source File: data.py From PyTorch-BayesianCNN with MIT License | 6 votes |
def getDataloader(trainset, testset, valid_size, batch_size, num_workers): num_train = len(trainset) indices = list(range(num_train)) np.random.shuffle(indices) split = int(np.floor(valid_size * num_train)) train_idx, valid_idx = indices[split:], indices[:split] train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers) valid_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, sampler=valid_sampler, num_workers=num_workers) test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, num_workers=num_workers) return train_loader, valid_loader, test_loader
Example #7
Source File: train.py From touchdown with Creative Commons Attribution 4.0 International | 6 votes |
def split_dataset(dataset, split_ratio, batch_size, shuffle_split=False): # creating data indices for training and tuning splits dataset_size = len(dataset) indices = list(range(dataset_size)) split = int(dataset_size * split_ratio) if shuffle_split: np.random.seed(args.seed) np.random.shuffle(indices) train_indices = indices[split:] tune_indices = indices[:split] train_sampler = SubsetRandomSampler(train_indices) tune_sampler = SubsetRandomSampler(tune_indices) train_iterator = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler) tune_iterator = DataLoader(dataset, batch_size=batch_size, sampler=tune_sampler) return train_iterator, tune_iterator
Example #8
Source File: rollout.py From midlevel-reps with MIT License | 6 votes |
def feed_forward_generator(self, advantages, num_mini_batch): num_steps, num_processes = self.rewards.size()[0:2] batch_size = num_processes * num_steps assert batch_size >= num_mini_batch, ( f"PPO requires the number processes ({num_processes}) " f"* number of steps ({num_steps}) = {num_processes * num_steps} " f"to be greater than or equal to the number of PPO mini batches ({num_mini_batch}).") mini_batch_size = batch_size // num_mini_batch observations_batch = {} sampler = BatchSampler(SubsetRandomSampler(range(batch_size)), mini_batch_size, drop_last=False) for indices in sampler: for k, sensor_ob in self.observations.items(): observations_batch[k] = sensor_ob[:-1].view(-1, *sensor_ob.size()[2:])[indices] states_batch = self.states[:-1].view(-1, self.states.size(-1))[indices] actions_batch = self.actions.view(-1, self.actions.size(-1))[indices] return_batch = self.returns[:-1].view(-1, 1)[indices] masks_batch = self.masks[:-1].view(-1, 1)[indices] old_action_log_probs_batch = self.action_log_probs.view(-1, 1)[indices] adv_targ = advantages.view(-1, 1)[indices] yield observations_batch, states_batch, actions_batch, \ return_batch, masks_batch, old_action_log_probs_batch, adv_targ
Example #9
Source File: storage.py From gym-miniworld with Apache License 2.0 | 6 votes |
def feed_forward_generator(self, advantages, num_mini_batch): num_steps, num_processes = self.rewards.size()[0:2] batch_size = num_processes * num_steps assert batch_size >= num_mini_batch, ( "PPO requires the number of processes ({}) " "* number of steps ({}) = {} " "to be greater than or equal to the number of PPO mini batches ({})." "".format(num_processes, num_steps, num_processes * num_steps, num_mini_batch)) mini_batch_size = batch_size // num_mini_batch sampler = BatchSampler(SubsetRandomSampler(range(batch_size)), mini_batch_size, drop_last=False) for indices in sampler: obs_batch = self.obs[:-1].view(-1, *self.obs.size()[2:])[indices] recurrent_hidden_states_batch = self.recurrent_hidden_states[:-1].view(-1, self.recurrent_hidden_states.size(-1))[indices] actions_batch = self.actions.view(-1, self.actions.size(-1))[indices] return_batch = self.returns[:-1].view(-1, 1)[indices] masks_batch = self.masks[:-1].view(-1, 1)[indices] old_action_log_probs_batch = self.action_log_probs.view(-1, 1)[indices] adv_targ = advantages.view(-1, 1)[indices] yield obs_batch, recurrent_hidden_states_batch, actions_batch, \ return_batch, masks_batch, old_action_log_probs_batch, adv_targ
Example #10
Source File: storage.py From dal with MIT License | 6 votes |
def feed_forward_generator(self, advantages, num_mini_batch): num_steps, num_processes = self.rewards.size()[0:2] batch_size = num_processes * num_steps assert batch_size >= num_mini_batch, ( "PPO requires the number of processes ({}) " "* number of steps ({}) = {} " "to be greater than or equal to the number of PPO mini batches ({})." "".format(num_processes, num_steps, num_processes * num_steps, num_mini_batch)) mini_batch_size = batch_size // num_mini_batch sampler = BatchSampler(SubsetRandomSampler(range(batch_size)), mini_batch_size, drop_last=False) for indices in sampler: obs_batch = self.obs[:-1].view(-1, *self.obs.size()[2:])[indices] recurrent_hidden_states_batch = self.recurrent_hidden_states[:-1].view(-1, self.recurrent_hidden_states.size(-1))[indices] actions_batch = self.actions.view(-1, self.actions.size(-1))[indices] value_preds_batch = self.value_preds[:-1].view(-1, 1)[indices] return_batch = self.returns[:-1].view(-1, 1)[indices] masks_batch = self.masks[:-1].view(-1, 1)[indices] old_action_log_probs_batch = self.action_log_probs.view(-1, 1)[indices] adv_targ = advantages.view(-1, 1)[indices] yield obs_batch, recurrent_hidden_states_batch, actions_batch, \ value_preds_batch, return_batch, masks_batch, old_action_log_probs_batch, adv_targ
Example #11
Source File: storage.py From marl_transfer with MIT License | 6 votes |
def feed_forward_generator(self, advantages, num_mini_batch, sampler=None): num_steps, num_processes = self.rewards.size()[0:2] batch_size = num_processes * num_steps assert batch_size >= num_mini_batch, ( "PPO requires the number of processes ({}) " "* number of steps ({}) = {} " "to be greater than or equal to the number of PPO mini batches ({})." "".format(num_processes, num_steps, num_processes * num_steps, num_mini_batch)) mini_batch_size = batch_size // num_mini_batch if sampler is None: sampler = BatchSampler(SubsetRandomSampler(range(batch_size)), mini_batch_size, drop_last=False) for indices in sampler: obs_batch = self.obs[:-1].view(-1, *self.obs.size()[2:])[indices] recurrent_hidden_states_batch = self.recurrent_hidden_states[:-1].view(-1, self.recurrent_hidden_states.size(-1))[indices] actions_batch = self.actions.view(-1, self.actions.size(-1))[indices] value_preds_batch = self.value_preds[:-1].view(-1, 1)[indices] return_batch = self.returns[:-1].view(-1, 1)[indices] masks_batch = self.masks[:-1].view(-1, 1)[indices] old_action_log_probs_batch = self.action_log_probs.view(-1, 1)[indices] adv_targ = advantages.view(-1, 1)[indices] yield obs_batch, recurrent_hidden_states_batch, actions_batch, value_preds_batch, return_batch, \ masks_batch, old_action_log_probs_batch, adv_targ
Example #12
Source File: memory.py From ppo-pytorch with MIT License | 6 votes |
def sample(self, advantages, num_mini_batch): num_steps, num_processes = self.rewards.size()[0:2] batch_size = num_processes * num_steps # Make sure we have at least enough for a bunch of batches of size 1. assert batch_size >= num_mini_batch mini_batch_size = batch_size // num_mini_batch sampler = BatchSampler(SubsetRandomSampler(range(batch_size)), mini_batch_size, drop_last=False) for indices in sampler: observations_batch = self.observations[:-1].view(-1, *self.observations.size()[2:])[indices] actions_batch = self.actions.view(-1, self.actions.size(-1))[indices] return_batch = self.returns[:-1].view(-1, 1)[indices] masks_batch = self.masks[:-1].view(-1, 1)[indices] old_action_log_probs_batch = self.action_log_probs.view(-1, 1)[indices] adv = advantages.view(-1, 1)[indices] yield observations_batch, actions_batch, \ return_batch, masks_batch, old_action_log_probs_batch, adv
Example #13
Source File: DQN_CartPole-v0.py From Deep-reinforcement-learning-with-pytorch with MIT License | 6 votes |
def update(self): if self.memory_count >= self.capacity: state = torch.tensor([t.state for t in self.memory]).float() action = torch.LongTensor([t.action for t in self.memory]).view(-1,1).long() reward = torch.tensor([t.reward for t in self.memory]).float() next_state = torch.tensor([t.next_state for t in self.memory]).float() reward = (reward - reward.mean()) / (reward.std() + 1e-7) with torch.no_grad(): target_v = reward + self.gamma * self.target_net(next_state).max(1)[0] #Update... for index in BatchSampler(SubsetRandomSampler(range(len(self.memory))), batch_size=self.batch_size, drop_last=False): v = (self.act_net(state).gather(1, action))[index] loss = self.loss_func(target_v[index].unsqueeze(1), (self.act_net(state).gather(1, action))[index]) self.optimizer.zero_grad() loss.backward() self.optimizer.step() self.writer.add_scalar('loss/value_loss', loss, self.update_count) self.update_count +=1 if self.update_count % 100 ==0: self.target_net.load_state_dict(self.act_net.state_dict()) else: print("Memory Buff is too less")
Example #14
Source File: DQN_MountainCar-v0.py From Deep-reinforcement-learning-with-pytorch with MIT License | 6 votes |
def update(self): if self.memory_count >= self.capacity: state = torch.tensor([t.state for t in self.memory]).float() action = torch.LongTensor([t.action for t in self.memory]).view(-1,1).long() reward = torch.tensor([t.reward for t in self.memory]).float() next_state = torch.tensor([t.next_state for t in self.memory]).float() reward = (reward - reward.mean()) / (reward.std() + 1e-7) with torch.no_grad(): target_v = reward + self.gamma * self.target_net(next_state).max(1)[0] #Update... for index in BatchSampler(SubsetRandomSampler(range(len(self.memory))), batch_size=self.batch_size, drop_last=False): v = (self.act_net(state).gather(1, action))[index] loss = self.loss_func(target_v[index].unsqueeze(1), (self.act_net(state).gather(1, action))[index]) self.optimizer.zero_grad() loss.backward() self.optimizer.step() self.writer.add_scalar('loss/value_loss', loss, self.update_count) self.update_count +=1 if self.update_count % 100 ==0: self.target_net.load_state_dict(self.act_net.state_dict()) else: print("Memory Buff is too less")
Example #15
Source File: samplers.py From Multilingual_Text_to_Speech with MIT License | 6 votes |
def __init__(self, data_source, languages, batch_size, data_parallel_devices=1, shuffle=True, drop_last=False): assert batch_size % (len(languages) * data_parallel_devices) == 0, ( 'Batch size must be divisible by number of languages times the number of data parallel devices (if enabled).') label_indices = {} for idx in range(len(data_source)): label = data_source.items[idx]['language'] if label not in label_indices: label_indices[label] = [] label_indices[label].append(idx) if shuffle: self._samplers = [SubsetRandomSampler(label_indices[i]) for i, _ in enumerate(languages)] else: self._samplers = [SubsetSampler(label_indices[i]) for i, _ in enumerate(languages)] self._batch_size = batch_size self._drop_last = drop_last self._dp_devices = data_parallel_devices
Example #16
Source File: ppo.py From marl_transfer with MIT License | 6 votes |
def magent_feed_forward_generator(rollouts_list, advantages_list, num_mini_batch): num_steps, num_processes = rollouts_list[0].rewards.size()[0:2] batch_size = num_processes * num_steps mini_batch_size = int((batch_size/num_mini_batch)) # size of minibatch for each agent sampler = BatchSampler(SubsetRandomSampler(range(batch_size)), mini_batch_size, drop_last=False) for indices in sampler: obs_batch=torch.cat([rollout.obs[:-1].view(-1,*rollout.obs.size()[2:])[indices] for rollout in rollouts_list],0) recurrent_hidden_states_batch = torch.cat([rollout.recurrent_hidden_states[:-1].view(-1, rollout.recurrent_hidden_states.size(-1))[indices] for rollout in rollouts_list],0) actions_batch = torch.cat([rollout.actions.view(-1, rollout.actions.size(-1))[indices] for rollout in rollouts_list],0) value_preds_batch=torch.cat([rollout.value_preds[:-1].view(-1, 1)[indices] for rollout in rollouts_list],0) return_batch = torch.cat([rollout.returns[:-1].view(-1, 1)[indices] for rollout in rollouts_list],0) masks_batch = torch.cat([rollout.masks[:-1].view(-1, 1)[indices] for rollout in rollouts_list],0) old_action_log_probs_batch=torch.cat([rollout.action_log_probs.view(-1,1)[indices] for rollout in rollouts_list],0) adv_targ = torch.cat([advantages.view(-1, 1)[indices] for advantages in advantages_list],0) yield obs_batch, recurrent_hidden_states_batch, actions_batch, value_preds_batch, return_batch,\ masks_batch, old_action_log_probs_batch, adv_targ
Example #17
Source File: create_image_dataloader.py From Auto-PyTorch with Apache License 2.0 | 5 votes |
def fit(self, pipeline_config, hyperparameter_config, X, Y, train_indices, valid_indices, train_transform, valid_transform, dataset_info): # if len(X.shape) > 1: # return super(CreateImageDataLoader, self).fit(pipeline_config, hyperparameter_config, X, Y, train_indices, valid_indices) torch.manual_seed(pipeline_config["random_seed"]) hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config) if dataset_info.default_dataset: train_dataset = dataset_info.default_dataset(root=pipeline_config['default_dataset_download_dir'], train=True, download=True, transform=train_transform) if valid_indices is not None: valid_dataset = dataset_info.default_dataset(root=pipeline_config['default_dataset_download_dir'], train=True, download=True, transform=valid_transform) elif len(X.shape) > 1: train_dataset = XYDataset(X, Y, transform=train_transform, target_transform=lambda y: y.astype(np.int64)) valid_dataset = XYDataset(X, Y, transform=valid_transform, target_transform=lambda y: y.astype(np.int64)) else: train_dataset = ImageFilelist(X, Y, transform=train_transform, target_transform=lambda y: y.astype(np.int64), cache_size=pipeline_config['dataloader_cache_size_mb'] * 1000, image_size=dataset_info.x_shape[2:]) if valid_indices is not None: valid_dataset = ImageFilelist(X, Y, transform=valid_transform, target_transform=lambda y: y.astype(np.int64), cache_size=0, image_size=dataset_info.x_shape[2:]) valid_dataset.cache = train_dataset.cache train_loader = DataLoader( dataset=train_dataset, batch_size=int(hyperparameter_config['batch_size']), sampler=SubsetRandomSampler(train_indices), drop_last=True, pin_memory=True, num_workers=pipeline_config['dataloader_worker']) valid_loader = None if valid_indices is not None: valid_loader = DataLoader( dataset=valid_dataset, batch_size=int(hyperparameter_config['batch_size']), sampler=SubsetRandomSampler(valid_indices), drop_last=False, pin_memory=True, num_workers=pipeline_config['dataloader_worker']) return {'train_loader': train_loader, 'valid_loader': valid_loader, 'batch_size': hyperparameter_config['batch_size']}
Example #18
Source File: storage.py From bezos with MIT License | 5 votes |
def feed_forward_generator(self, advantages, num_mini_batch): num_steps, num_processes = self.rewards.size()[0:2] # The number of experiences we have in memory batch_size = num_processes * num_steps assert batch_size >= num_mini_batch, ( "PPO requires the number of processes ({}) " "* number of steps ({}) = {} " "to be greater than or equal to the number of PPO mini batches ({})." "".format(num_processes, num_steps, num_processes * num_steps, num_mini_batch)) # Split the batch into multiple mini batches mini_batch_size = batch_size // num_mini_batch sampler = BatchSampler(SubsetRandomSampler( range(batch_size)), mini_batch_size, drop_last=False) for indices in sampler: # Indices is a torch tensor of indices for that mini batch # We dont't take the last observation, mask, value_pred, return and hxs # .view(-1, *self.obs.size()[2:]) This compress T (the number of timsteps) and N (the number of parallel agents running) in the same dimension (our learner doesn't care about that) obs_batch = self.obs[:-1].view(-1, *self.obs.size()[2:])[indices] # We don't provide the last hidden state because it is not necessary for the feedforward pass. We compress T and N again recurrent_hidden_states_batch = self.recurrent_hidden_states[:-1].view(-1, self.recurrent_hidden_states.size(-1))[indices] # Same things for these values, these are just scalars for every N, T pair actions_batch = self.actions.view(-1, self.actions.size(-1))[indices] value_preds_batch = self.value_preds[:-1].view(-1, 1)[indices] return_batch = self.returns[:-1].view(-1, 1)[indices] masks_batch = self.masks[:-1].view(-1, 1)[indices] old_action_log_probs_batch = self.action_log_probs.view(-1, 1)[ indices] # This advantage computed at the end of every episodes adv_targ = advantages.view(-1, 1)[indices] yield obs_batch, recurrent_hidden_states_batch, actions_batch, \ value_preds_batch, return_batch, masks_batch, old_action_log_probs_batch, adv_targ
Example #19
Source File: _sampler.py From tape with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __iter__(self): for bucket in self.bucket_sampler: sorted_sampler = SortedSampler(self.dataset, self.sort_key, indices=bucket) for batch in SubsetRandomSampler( list(BatchSampler(sorted_sampler, self.batch_size, self.drop_last))): yield batch
Example #20
Source File: train.py From srgan with MIT License | 5 votes |
def maybe_get_subset_sampler(num_samples, dataset): if num_samples is None or num_samples == 0: return None if num_samples > len(dataset): raise ValueError(('Requesting subset of {} samples, but ' 'dataset has only {}').format(num_samples, len(dataset))) from torch.utils.data.sampler import SubsetRandomSampler return SubsetRandomSampler(range(num_samples))
Example #21
Source File: lstm_enc_dec_axl.py From DeepADoTS with MIT License | 5 votes |
def fit(self, X: pd.DataFrame): X.interpolate(inplace=True) X.bfill(inplace=True) data = X.values sequences = [data[i:i + self.sequence_length] for i in range(data.shape[0] - self.sequence_length + 1)] indices = np.random.permutation(len(sequences)) split_point = int(self.train_gaussian_percentage * len(sequences)) train_loader = DataLoader(dataset=sequences, batch_size=self.batch_size, drop_last=True, sampler=SubsetRandomSampler(indices[:-split_point]), pin_memory=True) train_gaussian_loader = DataLoader(dataset=sequences, batch_size=self.batch_size, drop_last=True, sampler=SubsetRandomSampler(indices[-split_point:]), pin_memory=True) self.lstmed = LSTMEDModule(X.shape[1], self.hidden_size, self.n_layers, self.use_bias, self.dropout, seed=self.seed, gpu=self.gpu) self.to_device(self.lstmed) optimizer = torch.optim.Adam(self.lstmed.parameters(), lr=self.lr) self.lstmed.train() for epoch in trange(self.num_epochs): logging.debug(f'Epoch {epoch+1}/{self.num_epochs}.') for ts_batch in train_loader: output = self.lstmed(self.to_var(ts_batch)) loss = nn.MSELoss(size_average=False)(output, self.to_var(ts_batch.float())) self.lstmed.zero_grad() loss.backward() optimizer.step() self.lstmed.eval() error_vectors = [] for ts_batch in train_gaussian_loader: output = self.lstmed(self.to_var(ts_batch)) error = nn.L1Loss(reduce=False)(output, self.to_var(ts_batch.float())) error_vectors += list(error.view(-1, X.shape[1]).data.cpu().numpy()) self.mean = np.mean(error_vectors, axis=0) self.cov = np.cov(error_vectors, rowvar=False)
Example #22
Source File: autoencoder.py From DeepADoTS with MIT License | 5 votes |
def fit(self, X: pd.DataFrame): X.interpolate(inplace=True) X.bfill(inplace=True) data = X.values sequences = [data[i:i + self.sequence_length] for i in range(data.shape[0] - self.sequence_length + 1)] indices = np.random.permutation(len(sequences)) split_point = int(self.train_gaussian_percentage * len(sequences)) train_loader = DataLoader(dataset=sequences, batch_size=self.batch_size, drop_last=True, sampler=SubsetRandomSampler(indices[:-split_point]), pin_memory=True) train_gaussian_loader = DataLoader(dataset=sequences, batch_size=self.batch_size, drop_last=True, sampler=SubsetRandomSampler(indices[-split_point:]), pin_memory=True) self.aed = AutoEncoderModule(X.shape[1], self.sequence_length, self.hidden_size, seed=self.seed, gpu=self.gpu) self.to_device(self.aed) # .double() optimizer = torch.optim.Adam(self.aed.parameters(), lr=self.lr) self.aed.train() for epoch in trange(self.num_epochs): logging.debug(f'Epoch {epoch+1}/{self.num_epochs}.') for ts_batch in train_loader: output = self.aed(self.to_var(ts_batch)) loss = nn.MSELoss(size_average=False)(output, self.to_var(ts_batch.float())) self.aed.zero_grad() loss.backward() optimizer.step() self.aed.eval() error_vectors = [] for ts_batch in train_gaussian_loader: output = self.aed(self.to_var(ts_batch)) error = nn.L1Loss(reduce=False)(output, self.to_var(ts_batch.float())) error_vectors += list(error.view(-1, X.shape[1]).data.cpu().numpy()) self.mean = np.mean(error_vectors, axis=0) self.cov = np.cov(error_vectors, rowvar=False)
Example #23
Source File: storage.py From carla-rl with MIT License | 5 votes |
def feed_forward_generator(self, advantages, num_mini_batch): num_steps, num_processes = self.rewards.size()[0:2] batch_size = num_processes * num_steps assert batch_size >= num_mini_batch, ( "PPO requires the number of processes ({}) " "* number of steps ({}) = {} " "to be greater than or equal to the number of PPO mini batches ({})." "".format(num_processes, num_steps, num_processes * num_steps, num_mini_batch)) mini_batch_size = batch_size // num_mini_batch sampler = BatchSampler(SubsetRandomSampler(range(batch_size)), mini_batch_size, drop_last=False) for indices in sampler: obs_batch = {} self.obs = obs_to_dict(self.obs) for k in self.obs: obs_batch[k] = self.obs[k][:-1].view(-1, *self.obs[k].size()[2:])[indices] self.obs = dict_to_obs(self.obs) recurrent_hidden_states_batch = self.recurrent_hidden_states[:-1].view(-1, self.recurrent_hidden_states.size(-1))[indices] actions_batch = self.actions.view(-1, self.actions.size(-1))[indices] value_preds_batch = self.value_preds[:-1].view(-1, 1)[indices] return_batch = self.returns[:-1].view(-1, 1)[indices] masks_batch = self.masks[:-1].view(-1, 1)[indices] old_action_log_probs_batch = self.action_log_probs.view(-1, 1)[indices] adv_targ = advantages.view(-1, 1)[indices] yield obs_batch, recurrent_hidden_states_batch, actions_batch, \ value_preds_batch, return_batch, masks_batch, old_action_log_probs_batch, adv_targ
Example #24
Source File: datautils.py From semi-supervised-pytorch with MIT License | 5 votes |
def get_mnist(location="./", batch_size=64, labels_per_class=100): from functools import reduce from operator import __or__ from torch.utils.data.sampler import SubsetRandomSampler from torchvision.datasets import MNIST import torchvision.transforms as transforms from utils import onehot flatten_bernoulli = lambda x: transforms.ToTensor()(x).view(-1).bernoulli() mnist_train = MNIST(location, train=True, download=True, transform=flatten_bernoulli, target_transform=onehot(n_labels)) mnist_valid = MNIST(location, train=False, download=True, transform=flatten_bernoulli, target_transform=onehot(n_labels)) def get_sampler(labels, n=None): # Only choose digits in n_labels (indices,) = np.where(reduce(__or__, [labels == i for i in np.arange(n_labels)])) # Ensure uniform distribution of labels np.random.shuffle(indices) indices = np.hstack([list(filter(lambda idx: labels[idx] == i, indices))[:n] for i in range(n_labels)]) indices = torch.from_numpy(indices) sampler = SubsetRandomSampler(indices) return sampler # Dataloaders for MNIST labelled = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, num_workers=2, pin_memory=cuda, sampler=get_sampler(mnist_train.train_labels.numpy(), labels_per_class)) unlabelled = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, num_workers=2, pin_memory=cuda, sampler=get_sampler(mnist_train.train_labels.numpy())) validation = torch.utils.data.DataLoader(mnist_valid, batch_size=batch_size, num_workers=2, pin_memory=cuda, sampler=get_sampler(mnist_valid.test_labels.numpy())) return labelled, unlabelled, validation
Example #25
Source File: create_dataloader.py From Auto-PyTorch with Apache License 2.0 | 5 votes |
def fit(self, pipeline_config, hyperparameter_config, X, Y, train_indices, valid_indices): torch.manual_seed(pipeline_config["random_seed"]) hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config) # prepare data drop_last = hyperparameter_config['batch_size'] < train_indices.shape[0] X, Y = to_dense(X), to_dense(Y) X, Y = torch.from_numpy(X).float(), torch.from_numpy(Y) train_dataset = TensorDataset(X, Y) train_loader = DataLoader( dataset=train_dataset, batch_size=hyperparameter_config['batch_size'], sampler=SubsetRandomSampler(train_indices), shuffle=False, drop_last=drop_last) valid_loader = None if valid_indices is not None: valid_loader = DataLoader( dataset=Subset(train_dataset, valid_indices), batch_size=hyperparameter_config['batch_size'], shuffle=False, drop_last=False) return {'train_loader': train_loader, 'valid_loader': valid_loader, 'batch_size': hyperparameter_config['batch_size']}
Example #26
Source File: base_data_loader.py From vae-audio with MIT License | 5 votes |
def _split_sampler(self, split): if split == 0.0: return None, None idx_full = np.arange(self.n_samples) np.random.seed(0) np.random.shuffle(idx_full) if isinstance(split, int): assert split > 0 assert split < self.n_samples, "validation set size is configured to be larger than entire dataset." len_valid = split else: len_valid = int(self.n_samples * split) valid_idx = idx_full[0:len_valid] train_idx = np.delete(idx_full, np.arange(0, len_valid)) train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) # turn off shuffle option which is mutually exclusive with sampler self.shuffle = False self.n_samples = len(train_idx) return train_sampler, valid_sampler
Example #27
Source File: GMVAE.py From GMVAE with MIT License | 5 votes |
def reconstruct_data(self, data_loader, sample_size=-1): """Reconstruct Data Args: data_loader: (DataLoader) loader containing the data sample_size: (int) size of random data to consider from data_loader Returns: reconstructed: (array) array containing the reconstructed data """ self.network.eval() # sample random data from loader indices = np.random.randint(0, len(data_loader.dataset), size=sample_size) test_random_loader = torch.utils.data.DataLoader(data_loader.dataset, batch_size=sample_size, sampler=SubsetRandomSampler(indices)) # obtain values it = iter(test_random_loader) test_batch_data, _ = it.next() original = test_batch_data.data.numpy() if self.cuda: test_batch_data = test_batch_data.cuda() # obtain reconstructed data out = self.network(test_batch_data, self.gumbel_temp, self.hard_gumbel) reconstructed = out['x_rec'] return original, reconstructed.data.cpu().numpy()
Example #28
Source File: helpers.py From Hash-Embeddings with MIT License | 5 votes |
def train_valid_load(dataset, validSize=0.1, isShuffle=True, seed=123, **kwargs): r"""Utility to split a training set into a validation and a training one. Note: This shouldn't be used if the train and test data are prprocessed differently. E.g. if you use dropout or a dictionnary for word embeddings. Args: dataset (torch.utils.data.Dataset): Dataset to split. validSize (float,optional): Percentage to keep for the validation set. In [0,1}. isShuffle (bool,optional): Whether should shuffle before splitting. seed (int, optional): sets the seed for generating random numbers. kwargs: Additional arguments to the `DataLoaders`. Returns: The train and the valid DataLoader, respectively. """ assert 0 <= validSize <= 1, "validSize:{}. Should be in [0,1]".format(validSize) np.random.seed(seed) torch.random.manual_seed(seed) if validSize == 0: return DataLoader(dataset, **kwargs), iter(()) nTrain = len(dataset) idcs = np.arange(nTrain) splitIdx = int(validSize * nTrain) if isShuffle: np.random.shuffle(idcs) trainIdcs, validIdcs = idcs[splitIdx:], idcs[:splitIdx] trainSampler = SubsetRandomSampler(trainIdcs) validSampler = SubsetRandomSampler(validIdcs) trainLoader = DataLoader(dataset, sampler=trainSampler, **kwargs) validLoader = DataLoader(dataset, sampler=validSampler, **kwargs) return trainLoader, validLoader
Example #29
Source File: train_source.py From pytorch-domain-adaptation with MIT License | 5 votes |
def create_dataloaders(batch_size): dataset = MNIST(config.DATA_DIR/'mnist', train=True, download=True, transform=Compose([GrayscaleToRgb(), ToTensor()])) shuffled_indices = np.random.permutation(len(dataset)) train_idx = shuffled_indices[:int(0.8*len(dataset))] val_idx = shuffled_indices[int(0.8*len(dataset)):] train_loader = DataLoader(dataset, batch_size=batch_size, drop_last=True, sampler=SubsetRandomSampler(train_idx), num_workers=1, pin_memory=True) val_loader = DataLoader(dataset, batch_size=batch_size, drop_last=False, sampler=SubsetRandomSampler(val_idx), num_workers=1, pin_memory=True) return train_loader, val_loader
Example #30
Source File: mnist_sslvae.py From semi-supervised-pytorch with MIT License | 5 votes |
def get_mnist(location="./", batch_size=64, labels_per_class=100): from functools import reduce from operator import __or__ from torch.utils.data.sampler import SubsetRandomSampler from torchvision.datasets import MNIST import torchvision.transforms as transforms from utils import onehot flatten_bernoulli = lambda x: transforms.ToTensor()(x).view(-1).bernoulli() mnist_train = MNIST(location, train=True, download=True, transform=flatten_bernoulli, target_transform=onehot(n_labels)) mnist_valid = MNIST(location, train=False, download=True, transform=flatten_bernoulli, target_transform=onehot(n_labels)) def get_sampler(labels, n=None): # Only choose digits in n_labels (indices,) = np.where(reduce(__or__, [labels == i for i in np.arange(n_labels)])) # Ensure uniform distribution of labels np.random.shuffle(indices) indices = np.hstack([list(filter(lambda idx: labels[idx] == i, indices))[:n] for i in range(n_labels)]) indices = torch.from_numpy(indices) sampler = SubsetRandomSampler(indices) return sampler # Dataloaders for MNIST labelled = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, num_workers=2, pin_memory=cuda, sampler=get_sampler(mnist_train.train_labels.numpy(), labels_per_class)) unlabelled = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, num_workers=2, pin_memory=cuda, sampler=get_sampler(mnist_train.train_labels.numpy())) validation = torch.utils.data.DataLoader(mnist_valid, batch_size=batch_size, num_workers=2, pin_memory=cuda, sampler=get_sampler(mnist_valid.test_labels.numpy())) return labelled, unlabelled, validation