Python torch.utils.data.sampler.WeightedRandomSampler() Examples
The following are 14
code examples of torch.utils.data.sampler.WeightedRandomSampler().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.utils.data.sampler
, or try the search function
.
Example #1
Source File: clustering.py From torchsupport with MIT License | 6 votes |
def train(self): for epoch_id in range(self.max_epochs): self.epoch_id = epoch_id embedding = self.embed_all() weights, labels, centers = self.cluster(embedding) self.each_cluster(embedding, labels) self.data.labels = labels self.train_data = None self.train_data = DataLoader( self.data, batch_size=self.batch_size, num_workers=8, sampler=WeightedRandomSampler(weights, len(self.data) * 4, replacement=True) ) for data, label in self.train_data: self.step(data, label, centers) if self.step_id % self.checkpoint_interval == 0: self.checkpoint() self.step_id += 1 return self.net
Example #2
Source File: __init__.py From ignite with BSD 3-Clause "New" or "Revised" License | 6 votes |
def setup_sampler(sampler_type, num_iters, batch_size): if sampler_type is None: return None, batch_size if sampler_type == "weighted": from torch.utils.data.sampler import WeightedRandomSampler w = torch.ones(num_iters * batch_size, dtype=torch.float) for i in range(num_iters): w[batch_size * i : batch_size * (i + 1)] += i * 1.0 return WeightedRandomSampler(w, num_samples=num_iters * batch_size, replacement=True), batch_size if sampler_type == "distributed": from torch.utils.data.distributed import DistributedSampler import torch.distributed as dist num_replicas = 1 rank = 0 if dist.is_available() and dist.is_initialized(): num_replicas = dist.get_world_size() rank = dist.get_rank() dataset = torch.zeros(num_iters * batch_size) return DistributedSampler(dataset, num_replicas=num_replicas, rank=rank), batch_size // num_replicas
Example #3
Source File: test_auto.py From ignite with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_dist_proxy_sampler(): import torch from torch.utils.data import WeightedRandomSampler weights = torch.ones(100) weights[:50] += 1 num_samples = 100 sampler = WeightedRandomSampler(weights, num_samples) num_replicas = 4 dist_samplers = [DistributedProxySampler(sampler, num_replicas=num_replicas, rank=i) for i in range(num_replicas)] torch.manual_seed(0) true_indices = list(sampler) indices_per_rank = [] for s in dist_samplers: s.set_epoch(0) indices_per_rank += list(s) assert set(indices_per_rank) == set(true_indices)
Example #4
Source File: clustering.py From torchsupport with MIT License | 5 votes |
def train(self): expectation, embedding = self.expectation() weights, labels, centers = self.cluster(embedding) self.data.labels = torch.zeros_like(expectation) self.data.labels[expectation.argmax(dim=1)] = 1 for epoch_id in range(self.max_epochs): self.epoch_id = epoch_id self.train_data = None self.train_data = DataLoader( self.data, batch_size=self.batch_size, num_workers=8, sampler=WeightedRandomSampler(weights, len(self.data) * 4, replacement=True) ) for data, expected_logits in self.train_data: self.step(data, expected_logits, centers) self.step_id += 1 expectation, embedding = self.expectation() labels = expectation.argmax(dim=1).to("cpu").squeeze() self.each_cluster( expectation.to("cpu"), labels.numpy() ) self.data.labels = expectation.to("cpu").squeeze() return self.net
Example #5
Source File: clustering.py From torchsupport with MIT License | 5 votes |
def train(self): for epoch_id in range(self.max_epochs): self.epoch_id = epoch_id embedding = self.embed_all() label_hierarchy = [] center_hierarchy = [] for clustering in self.clusterings: self.clustering = clustering weights, labels, centers = self.cluster(embedding) label_hierarchy.append(np.expand_dims(labels, axis=1)) center_hierarchy.append(centers) self.each_cluster(embedding, label_hierarchy) label_hierarchy = np.concatenate(label_hierarchy, axis=1) self.data.labels = label_hierarchy self.train_data = None self.train_data = DataLoader( self.data, batch_size=self.batch_size, num_workers=0, sampler=WeightedRandomSampler(weights, min(20000, len(self.data)), replacement=True) ) for inner_epoch in range(1): for data, label in self.train_data: self.step(data, label, center_hierarchy) if self.step_id % self.checkpoint_interval == 0: self.checkpoint() self.step_id += 1 return self.net
Example #6
Source File: __init__.py From margipose with Apache License 2.0 | 5 votes |
def sampler(self, examples_per_epoch=None): total_length = len(self) if examples_per_epoch is None: examples_per_epoch = total_length # Sample with replacement only if we have to replacement = examples_per_epoch > total_length return WeightedRandomSampler( torch.ones(total_length).double(), examples_per_epoch, replacement=replacement )
Example #7
Source File: test_auto.py From ignite with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _test_auto_dataloader(ws, nproc, batch_size, num_workers=1, sampler_name=None, dl_type=DataLoader): data = torch.rand(100, 3, 12, 12) if sampler_name is None: sampler = None elif sampler_name == "WeightedRandomSampler": sampler = WeightedRandomSampler(weights=torch.ones(100), num_samples=100) else: raise RuntimeError("Unknown sampler name: {}".format(sampler_name)) # Test auto_dataloader assert idist.get_world_size() == ws dataloader = auto_dataloader( data, batch_size=batch_size, num_workers=num_workers, sampler=sampler, shuffle=sampler is None ) assert isinstance(dataloader, dl_type) if hasattr(dataloader, "_loader"): dataloader = dataloader._loader if ws < batch_size: assert dataloader.batch_size == batch_size // ws else: assert dataloader.batch_size == batch_size if ws <= num_workers: assert dataloader.num_workers == (num_workers + nproc - 1) // nproc else: assert dataloader.num_workers == num_workers if ws < 2: sampler_type = RandomSampler if sampler is None else type(sampler) assert isinstance(dataloader.sampler, sampler_type) else: sampler_type = DistributedSampler if sampler is None else DistributedProxySampler assert isinstance(dataloader.sampler, sampler_type) if isinstance(dataloader, DataLoader): assert dataloader.pin_memory == ("cuda" in idist.device().type)
Example #8
Source File: test_auto.py From ignite with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_auto_methods_no_dist(): _test_auto_dataloader(1, 1, batch_size=1) _test_auto_dataloader(1, 1, batch_size=10, num_workers=10) _test_auto_dataloader(1, 1, batch_size=10, sampler_name="WeightedRandomSampler") _test_auto_model_optimizer(1, "cpu")
Example #9
Source File: test_auto.py From ignite with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_auto_methods_gloo(distributed_context_single_node_gloo): ws = distributed_context_single_node_gloo["world_size"] _test_auto_dataloader(ws=ws, nproc=ws, batch_size=1) _test_auto_dataloader(ws=ws, nproc=ws, batch_size=10, num_workers=10) _test_auto_dataloader(ws=ws, nproc=ws, batch_size=10, sampler_name="WeightedRandomSampler") _test_auto_model_optimizer(ws, "cpu")
Example #10
Source File: test_auto.py From ignite with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_auto_methods_nccl(distributed_context_single_node_nccl): ws = distributed_context_single_node_nccl["world_size"] lrank = distributed_context_single_node_nccl["local_rank"] _test_auto_dataloader(ws=ws, nproc=ws, batch_size=1) _test_auto_dataloader(ws=ws, nproc=ws, batch_size=10, num_workers=10) _test_auto_dataloader(ws=ws, nproc=ws, batch_size=1, sampler_name="WeightedRandomSampler") device = "cuda" _test_auto_model_optimizer(ws, device)
Example #11
Source File: sklearn_api.py From pt-avitm with MIT License | 5 votes |
def fit(self, X, y=None) -> None: documents, features = X.shape ds = CountTensorDataset(X.astype(np.float32)) self.autoencoder = ProdLDA( in_dimension=features, hidden1_dimension=self.hidden1_dimension, hidden2_dimension=self.hidden2_dimension, topics=self.topics, ) if self.cuda: self.autoencoder.cuda() ae_optimizer = Adam( self.autoencoder.parameters(), lr=self.lr, betas=(0.99, 0.999) ) train( ds, self.autoencoder, cuda=self.cuda, validation=None, epochs=self.epochs, batch_size=self.batch_size, optimizer=ae_optimizer, sampler=WeightedRandomSampler( torch.ones(documents), min(documents, self.samples) ), silent=True, num_workers=0, # TODO causes a bug to change this on Mac )
Example #12
Source File: samplers.py From Multilingual_Text_to_Speech with MIT License | 5 votes |
def __init__(self, data_source): lebel_freq = {} for idx in range(len(data_source)): label = data_source.items[idx]['language'] if label in lebel_freq: lebel_freq[label] += 1 else: lebel_freq[label] = 1 total = float(sum(lebel_freq.values())) weights = [total / lebel_freq[data_source.items[idx]['language']] for idx in range(len(data_source))] self._sampler = WeightedRandomSampler(weights, len(weights))
Example #13
Source File: dataset_utils.py From imgclsmob with MIT License | 4 votes |
def get_train_data_source(ds_metainfo, batch_size, num_workers): """ Get data source for training subset. Parameters ---------- ds_metainfo : DatasetMetaInfo Dataset metainfo. batch_size : int Batch size. num_workers : int Number of background workers. Returns ------- DataLoader Data source. """ transform_train = ds_metainfo.train_transform(ds_metainfo=ds_metainfo) kwargs = ds_metainfo.dataset_class_extra_kwargs if ds_metainfo.dataset_class_extra_kwargs is not None else {} dataset = ds_metainfo.dataset_class( root=ds_metainfo.root_dir_path, mode="train", transform=transform_train, **kwargs) ds_metainfo.update_from_dataset(dataset) if not ds_metainfo.train_use_weighted_sampler: return DataLoader( dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True) else: sampler = WeightedRandomSampler( weights=dataset.sample_weights, num_samples=len(dataset)) return DataLoader( dataset=dataset, batch_size=batch_size, # shuffle=True, sampler=sampler, num_workers=num_workers, pin_memory=True)
Example #14
Source File: train.py From graphx-conv with MIT License | 4 votes |
def train_valid(data_root, name, img_enc, pc_enc, pc_dec, optimizer, scheduler, adain=True, projection=True, decimation=None, color_img=False, n_points=250, bs=4, lr=5e-5, weight_decay=1e-5, gamma=.3, milestones=(5, 8), n_epochs=10, print_freq=1000, val_freq=10000, checkpoint_folder=None): if decimation is not None: pc_dec = partial(pc_dec, decimation=decimation) net = PointcloudDeformNet((bs,) + (3 if color_img else 1, 224, 224), (bs, n_points, 3), img_enc, pc_enc, pc_dec, adain=adain, projection=projection, optimizer=lambda x: optimizer(x, lr, weight_decay=weight_decay), scheduler=lambda x: scheduler(x, milestones=milestones, gamma=gamma), weight_decay=None) print(net) train_data = ShapeNet(path=data_root, grayscale=not color_img, type='train', n_points=n_points) sampler = WeightedRandomSampler(train_data.sample_weights, len(train_data), True) train_loader = DataLoader(train_data, batch_size=bs, num_workers=1, collate_fn=collate, drop_last=True, sampler=sampler) val_data = ShapeNet(path=data_root, grayscale=not color_img, type='valid', num_vals=10 * len(os.listdir(data_root)), n_points=n_points) val_loader = DataLoader(val_data, batch_size=bs, shuffle=False, num_workers=1, collate_fn=collate, drop_last=True) if checkpoint_folder is None: mon = nnt.Monitor(name, print_freq=print_freq, num_iters=len(train_data) // bs, use_tensorboard=True) mon.copy_files(backup_files) mon.dump_rep('network', net) mon.dump_rep('optimizer', net.optim['optimizer']) if net.optim['scheduler']: mon.dump_rep('scheduler', net.optim['scheduler']) states = { 'model_state_dict': net.state_dict(), 'opt_state_dict': net.optim['optimizer'].state_dict() } if net.optim['scheduler']: states['scheduler_state_dict'] = net.optim['scheduler'].state_dict() mon.schedule(mon.dump, beginning=False, name='training.pt', obj=states, type='torch', keep=5) print('Training...') else: mon = nnt.Monitor(current_folder=checkpoint_folder, print_freq=print_freq, num_iters=len(train_data) // bs, use_tensorboard=True) states = mon.load('training.pt', type='torch') mon.set_iter(mon.get_epoch() * len(train_data) // bs) net.load_state_dict(states['model_state_dict']) net.optim['optimizer'].load_state_dict(states['opt_state_dict']) if net.optim['scheduler']: net.optim['scheduler'].load_state_dict(states['scheduler_state_dict']) print('Resume from epoch %d...' % mon.get_epoch()) mon.run_training(net, train_loader, n_epochs, val_loader, valid_freq=val_freq, reduce='mean') print('Training finished!')