Python torch.utils.data.DataLoader() Examples
The following are 30
code examples of torch.utils.data.DataLoader().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.utils.data
, or try the search function
.
Example #1
Source File: 48_fine_tune_hotdog.py From deep-learning-note with MIT License | 9 votes |
def train_fine_tuning(net, optimizer, batch_size=128, num_epochs=4): train_iter = DataLoader(ImageFolder(os.path.join(data_dir, 'train'), transform=train_augs), batch_size, shuffle=True) test_iter = DataLoader(ImageFolder(os.path.join(data_dir, 'test'), transform=test_augs), batch_size) loss = torch.nn.CrossEntropyLoss() utils.train(train_iter, test_iter, net, loss, optimizer, device, num_epochs)
Example #2
Source File: segmentation.py From steppy-toolkit with MIT License | 7 votes |
def get_datagen(self, X, y, train_mode, loader_params): if train_mode: dataset = self.dataset(X, y, train_mode=True, image_augment=self.image_augment_train, image_augment_with_target=self.image_augment_with_target_train, mask_transform=self.mask_transform, image_transform=self.image_transform, image_source=self.dataset_params.image_source) else: dataset = self.dataset(X, y, train_mode=False, image_augment=self.image_augment_inference, image_augment_with_target=self.image_augment_with_target_inference, mask_transform=self.mask_transform, image_transform=self.image_transform, image_source=self.dataset_params.image_source) datagen = DataLoader(dataset, **loader_params) steps = len(datagen) return datagen, steps
Example #3
Source File: train.py From pytorch-multigpu with MIT License | 7 votes |
def main(): best_acc = 0 device = 'cuda' if torch.cuda.is_available() else 'cpu' print('==> Preparing data..') transforms_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]) dataset_train = CIFAR10(root='../data', train=True, download=True, transform=transforms_train) train_loader = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=args.num_worker) # there are 10 classes so the dataset name is cifar-10 classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') print('==> Making model..') net = pyramidnet() net = nn.DataParallel(net) net = net.to(device) num_params = sum(p.numel() for p in net.parameters() if p.requires_grad) print('The number of parameters of model is', num_params) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=args.lr) # optimizer = optim.SGD(net.parameters(), lr=args.lr, # momentum=0.9, weight_decay=1e-4) train(net, criterion, optimizer, train_loader, device)
Example #4
Source File: get_data_loader.py From DDPAE-video-prediction with MIT License | 6 votes |
def get_data_loader(opt): if opt.dset_name == 'moving_mnist': transform = transforms.Compose([vtransforms.ToTensor()]) dset = MovingMNIST(opt.dset_path, opt.is_train, opt.n_frames_input, opt.n_frames_output, opt.num_objects, transform) elif opt.dset_name == 'bouncing_balls': transform = transforms.Compose([vtransforms.Scale(opt.image_size), vtransforms.ToTensor()]) dset = BouncingBalls(opt.dset_path, opt.is_train, opt.n_frames_input, opt.n_frames_output, opt.image_size[0], transform) else: raise NotImplementedError dloader = data.DataLoader(dset, batch_size=opt.batch_size, shuffle=opt.is_train, num_workers=opt.n_workers, pin_memory=True) return dloader
Example #5
Source File: main.py From slot-filling with MIT License | 6 votes |
def predict(train_data_path, test_data_path, slot_names_path, mode, bidirectional, saved_model_path, result_path, cuda): train_data = load_data(train_data_path) label2idx, idx2label = build_label_vocab(slot_names_path) word2idx, idx2word = build_vocab(train_data) test_data = load_data(test_data_path) test_X, test_y = build_dataset(test_data, word2idx, label2idx) test_set = ATISData(test_X, test_y) test_loader = DataLoader(dataset=test_set, batch_size=1, shuffle=False) vocab_size = len(word2idx) label_size = len(label2idx) model = SlotFilling(vocab_size, label_size, mode=mode, bidirectional=bidirectional) model.load_state_dict(torch.load(saved_model_path)) if cuda: model = model.cuda() predicted = do_eval(model, test_loader, cuda) predicted_labels = [idx2label[idx] for idx in predicted] gen_result_file(test_data, predicted_labels, result_path)
Example #6
Source File: loaddata.py From Visualizing-CNNs-for-monocular-depth-estimation with MIT License | 6 votes |
def getTestingData(batch_size=64): __imagenet_stats = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225]} # scale = random.uniform(1, 1.5) transformed_testing = depthDataset(csv_file='./data/nyu2_test.csv', transform=transforms.Compose([ Scale(240), CenterCrop([304, 228], [152, 114]), ToTensor(is_test=True), Normalize(__imagenet_stats['mean'], __imagenet_stats['std']) ])) dataloader_testing = DataLoader(transformed_testing, batch_size, shuffle=False, num_workers=4, pin_memory=False) return dataloader_testing
Example #7
Source File: dataloader_multiview_blender.py From DIB-R with MIT License | 6 votes |
def get_data_loaders(filelist, imsz, viewnum, mode, bs, numworkers, classes=None, data_folder=None): print('Building dataloaders') dataset_train = DataProvider(filelist, imsz, viewnum, mode=mode, datadebug=False, classes=classes, data_folder=data_folder) if mode == 'test': shuffle = False else: shuffle = True train_loader = DataLoader(dataset_train, batch_size=bs, shuffle=shuffle, num_workers=numworkers, collate_fn=collate_fn) print('train num {}'.format(len(dataset_train))) print('train iter'.format(len(train_loader))) return train_loader
Example #8
Source File: data.py From End-to-end-ASR-Pytorch with MIT License | 6 votes |
def load_textset(n_jobs, use_gpu, pin_memory, corpus, text): # Text tokenizer tokenizer = load_text_encoder(**text) # Dataset tr_set, dv_set, tr_loader_bs, dv_loader_bs, data_msg = create_textset( tokenizer, **corpus) collect_tr = partial(collect_text_batch, mode='train') collect_dv = partial(collect_text_batch, mode='dev') # Dataloader (Text data stored in RAM, no need num_workers) tr_set = DataLoader(tr_set, batch_size=tr_loader_bs, shuffle=True, drop_last=True, collate_fn=collect_tr, num_workers=0, pin_memory=use_gpu) dv_set = DataLoader(dv_set, batch_size=dv_loader_bs, shuffle=False, drop_last=False, collate_fn=collect_dv, num_workers=0, pin_memory=pin_memory) # Messages to show data_msg.append('I/O spec. | Token type = {}\t| Vocab size = {}' .format(tokenizer.token_type, tokenizer.vocab_size)) return tr_set, dv_set, tokenizer.vocab_size, tokenizer, data_msg
Example #9
Source File: evaluator.py From neuralcoref with MIT License | 6 votes |
def __init__(self, model, dataset, test_data_path, test_key_file, embed_path, args): """ Evaluate the pytorch model that is currently being build We take the embedding vocabulary currently being trained """ self.test_key_file = test_key_file self.cuda = args.cuda self.model = model batch_sampler = NCBatchSampler( dataset.mentions_pair_length, batchsize=args.batchsize, shuffle=False ) self.dataloader = DataLoader( dataset, collate_fn=padder_collate, batch_sampler=batch_sampler, num_workers=args.numworkers, pin_memory=args.cuda, ) self.mentions_idx, self.n_pairs = batch_sampler.get_batch_info() self.load_meta(test_data_path)
Example #10
Source File: base_task.py From Doc2EDAG with MIT License | 6 votes |
def prepare_data_loader(self, dataset, batch_size, rand_flag=True): # prepare data loader if rand_flag: data_sampler = RandomSampler(dataset) else: data_sampler = SequentialSampler(dataset) if self.custom_collate_fn is None: dataloader = DataLoader(dataset, batch_size=batch_size, sampler=data_sampler) else: dataloader = DataLoader(dataset, batch_size=batch_size, sampler=data_sampler, collate_fn=self.custom_collate_fn) return dataloader
Example #11
Source File: predictor.py From argus-freesound with MIT License | 6 votes |
def tile_prediction(model, image, transforms, tile_size, tile_step, batch_size): tiler = ImageSlicer(image.shape, tile_size=tile_size, tile_step=tile_step) tiles = tiler.split(image, value=float(image.min())) tiles = [transforms(tile) for tile in tiles] loader = DataLoader(tiles, batch_size=batch_size) preds_lst = [] for tiles_batch in loader: pred_batch = model.predict(tiles_batch) preds_lst.append(pred_batch) pred = torch.cat(preds_lst, dim=0) return pred.cpu().numpy()
Example #12
Source File: datasets.py From sato with Apache License 2.0 | 6 votes |
def generate_batches_col(dataset, batch_size, shuffle=True, drop_last=True, device="cpu"): dataloader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last) for data_dict in dataloader: if device == "cpu": yield data_dict else: out_data_dict = {} for name, tensor in data_dict.items(): if type(tensor) == dict: out_data_dict[name] = {} for n, t in tensor.items(): out_data_dict[name][n] = data_dict[name][n].to(device) else: out_data_dict[name] = data_dict[name].to(device) yield out_data_dict #generate table batches
Example #13
Source File: 53_machine_translation.py From deep-learning-note with MIT License | 6 votes |
def train(encoder, decoder, dataset, lr, batch_size, num_epochs): enc_optimizer = torch.optim.Adam(encoder.parameters(), lr=lr) dec_optimizer = torch.optim.Adam(decoder.parameters(), lr=lr) loss = nn.CrossEntropyLoss(reduction='none') data_iter = Data.DataLoader(dataset, batch_size, shuffle=True) for epoch in range(num_epochs): l_sum = 0.0 start = time.time() for X, Y in data_iter: enc_optimizer.zero_grad() dec_optimizer.zero_grad() l = batch_loss(encoder, decoder, X, Y, loss) l.backward() enc_optimizer.step() dec_optimizer.step() l_sum += l.item() if (epoch + 1) % 10 == 0: print("epoch %d, loss %.3f, time: %.1f sec" % (epoch + 1, l_sum / len(data_iter), time.time() - start))
Example #14
Source File: datasets.py From sato with Apache License 2.0 | 6 votes |
def generate_batches(dataset, batch_size, shuffle=True, drop_last=True, device="cpu", n_workers=0): dataloader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=n_workers, pin_memory=False) for data_dict, labels, masks in dataloader: if device == "cpu" or device == torch.device('cpu'): yield data_dict, labels, masks else: out_data_dict = {} for name, tensor in data_dict.items(): out_data_dict[name] = data_dict[name].to(device, non_blocking=True) yield out_data_dict, labels.to(device, non_blocking=True), masks.to(device, non_blocking=True)
Example #15
Source File: eval_hooks.py From mmdetection with Apache License 2.0 | 6 votes |
def __init__(self, dataloader, interval=1, gpu_collect=False, **eval_kwargs): if not isinstance(dataloader, DataLoader): raise TypeError('dataloader must be a pytorch DataLoader, but got ' f'{type(dataloader)}') self.dataloader = dataloader self.interval = interval self.gpu_collect = gpu_collect self.eval_kwargs = eval_kwargs
Example #16
Source File: model.py From pytorch-UNet with MIT License | 6 votes |
def predict_dataset(self, dataset, export_path): """ Predicts the images in the given dataset and saves it to disk. Args: dataset: the dataset of images to be exported, instance of unet.dataset.Image2D export_path: path to folder where results to be saved """ self.net.train(False) chk_mkdir(export_path) for batch_idx, (X_batch, *rest) in enumerate(DataLoader(dataset, batch_size=1)): if isinstance(rest[0][0], str): image_filename = rest[0][0] else: image_filename = '%s.png' % str(batch_idx + 1).zfill(3) X_batch = Variable(X_batch.to(device=self.device)) y_out = self.net(X_batch).cpu().data.numpy() io.imsave(os.path.join(export_path, image_filename), y_out[0, 1, :, :])
Example #17
Source File: model.py From LipNet-PyTorch with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__(self, opt): self.trainset = GRIDDataset(opt, dset='train') self.trainset.load_data() self.testset = GRIDDataset(opt, dset='test') self.testset.load_data() self.trainloader = DataLoader(self.trainset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, collate_fn=ctc_collate, pin_memory=True) self.testloader = DataLoader(self.testset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, collate_fn=ctc_collate, pin_memory=True) # define network self.input_img_size = [3, 50, 100] self.chan, self.height, self.width = self.input_img_size self.vocab_size = len(self.trainset.vocab) assert self.testset.vocab <= self.trainset.vocab, 'possible OOV characters in test set' self.maxT = self.trainset.opt.max_timesteps self.model = LipNet(opt, self.vocab_size) self.opt = opt self.optimfunc = torch.optim.Adam(self.model.parameters(), lr=self.opt.lr) # learning rate scheduler: fixed LR
Example #18
Source File: eval_hooks.py From mmdetection with Apache License 2.0 | 6 votes |
def __init__(self, dataloader, interval=1, **eval_kwargs): if not isinstance(dataloader, DataLoader): raise TypeError('dataloader must be a pytorch DataLoader, but got' f' {type(dataloader)}') self.dataloader = dataloader self.interval = interval self.eval_kwargs = eval_kwargs
Example #19
Source File: data_cnn.py From View-Adaptive-Neural-Networks-for-Skeleton-based-Human-Action-Recognition with MIT License | 5 votes |
def get_train_loader(self, batch_size, num_workers): if self.aug == 1: return DataLoader(self.train_set, batch_size=batch_size, shuffle=True, num_workers=num_workers, collate_fn=self.collate_fn_aug, pin_memory=True) else: return DataLoader(self.train_set, batch_size=batch_size, shuffle=True, num_workers=num_workers, collate_fn=self.collate_fn, pin_memory=True)
Example #20
Source File: odds.py From Deep-SAD-PyTorch with MIT License | 5 votes |
def loaders(self, batch_size: int, shuffle_train=True, shuffle_test=False, num_workers: int = 0) -> ( DataLoader, DataLoader): train_loader = DataLoader(dataset=self.train_set, batch_size=batch_size, shuffle=shuffle_train, num_workers=num_workers, drop_last=True) test_loader = DataLoader(dataset=self.test_set, batch_size=batch_size, shuffle=shuffle_test, num_workers=num_workers, drop_last=False) return train_loader, test_loader
Example #21
Source File: isoforest.py From Deep-SAD-PyTorch with MIT License | 5 votes |
def train(self, dataset: BaseADDataset, device: str = 'cpu', n_jobs_dataloader: int = 0): """Trains the Isolation Forest model on the training data.""" logger = logging.getLogger() # do not drop last batch for non-SGD optimization shallow_ssad train_loader = DataLoader(dataset=dataset.train_set, batch_size=128, shuffle=True, num_workers=n_jobs_dataloader, drop_last=False) # Get data from loader X = () for data in train_loader: inputs, _, _, _ = data inputs = inputs.to(device) if self.hybrid: inputs = self.ae_net.encoder(inputs) # in hybrid approach, take code representation of AE as features X_batch = inputs.view(inputs.size(0), -1) # X_batch.shape = (batch_size, n_channels * height * width) X += (X_batch.cpu().data.numpy(),) X = np.concatenate(X) # Training logger.info('Starting training...') start_time = time.time() self.model.fit(X) train_time = time.time() - start_time self.results['train_time'] = train_time logger.info('Training Time: {:.3f}s'.format(self.results['train_time'])) logger.info('Finished training.')
Example #22
Source File: bottom_up.py From Dispersion-based-Clustering with MIT License | 5 votes |
def get_dataloader(self, dataset, training=False): normalizer = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if training: transformer = T.Compose([ T.RandomSizedRectCrop(self.data_height, self.data_width), T.RandomHorizontalFlip(), T.ToTensor(), normalizer, ]) batch_size = self.batch_size else: transformer = T.Compose([ T.RectScale(self.data_height, self.data_width), T.ToTensor(), normalizer, ]) batch_size = self.eval_bs data_dir = self.data_dir data_loader = DataLoader( Preprocessor(dataset, root=data_dir, num_samples=self.frames_per_video, transform=transformer, is_training=training, max_frames=self.max_frames), batch_size=batch_size, num_workers=self.data_workers, shuffle=training, pin_memory=True, drop_last=training) current_status = "Training" if training else "Testing" print("Create dataloader for {} with batch_size {}".format(current_status, batch_size)) return data_loader
Example #23
Source File: dataset84.py From wechat_jump_end_to_end_train with MIT License | 5 votes |
def jump_data_loader(): normalize = transforms.Normalize(mean=[0.92206, 0.92206, 0.92206], std=[0.08426, 0.08426, 0.08426]) transform = transforms.Compose([transforms.ToTensor(),normalize]) dataset = JumpDataset(transform=transform) return DataLoader(dataset,batch_size = 32,shuffle = True)
Example #24
Source File: bottom_up.py From Dispersion-based-Clustering with MIT License | 5 votes |
def get_dataloader(self, dataset, training=False): normalizer = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if training: transformer = T.Compose([ T.RandomSizedRectCrop(self.data_height, self.data_width), T.RandomHorizontalFlip(), T.ToTensor(), normalizer, ]) batch_size = self.batch_size else: transformer = T.Compose([ T.RectScale(self.data_height, self.data_width), T.ToTensor(), normalizer, ]) batch_size = self.eval_bs data_dir = self.data_dir data_loader = DataLoader( Preprocessor(dataset, root=data_dir, num_samples=self.frames_per_video, transform=transformer, is_training=training, max_frames=self.max_frames), batch_size=batch_size, num_workers=self.data_workers, shuffle=training, pin_memory=True, drop_last=training) current_status = "Training" if training else "Testing" print("Create dataloader for {} with batch_size {}".format(current_status, batch_size)) return data_loader
Example #25
Source File: data_cnn.py From View-Adaptive-Neural-Networks-for-Skeleton-based-Human-Action-Recognition with MIT License | 5 votes |
def get_test_loader(self, batch_size, num_workers): return DataLoader(self.test_set, batch_size=batch_size, shuffle=False, num_workers=num_workers, collate_fn=self.collate_fn, pin_memory=True)
Example #26
Source File: data_cnn.py From View-Adaptive-Neural-Networks-for-Skeleton-based-Human-Action-Recognition with MIT License | 5 votes |
def get_val_loader(self, batch_size, num_workers): return DataLoader(self.val_set, batch_size=batch_size, shuffle=False, num_workers=num_workers, collate_fn=self.collate_fn, pin_memory=True)
Example #27
Source File: base_task.py From Doc2EDAG with MIT License | 5 votes |
def prepare_dist_data_loader(self, dataset, batch_size, epoch=0): # prepare distributed data loader data_sampler = DistributedSampler(dataset) data_sampler.set_epoch(epoch) if self.custom_collate_fn is None: dataloader = DataLoader(dataset, batch_size=batch_size, sampler=data_sampler) else: dataloader = DataLoader(dataset, batch_size=batch_size, sampler=data_sampler, collate_fn=self.custom_collate_fn) return dataloader
Example #28
Source File: train.py From pytorch-multigpu with MIT License | 5 votes |
def main(): best_acc = 0 device = 'cuda' if torch.cuda.is_available() else 'cpu' print('==> Preparing data..') transforms_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]) dataset_train = CIFAR10(root='../data', train=True, download=True, transform=transforms_train) train_loader = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=args.num_worker) # there are 10 classes so the dataset name is cifar-10 classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') print('==> Making model..') net = pyramidnet() net = net.to(device) num_params = sum(p.numel() for p in net.parameters() if p.requires_grad) print('The number of parameters of model is', num_params) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-4) train(net, criterion, optimizer, train_loader, device)
Example #29
Source File: main.py From pytorch-fm with MIT License | 5 votes |
def main(dataset_name, dataset_path, model_name, epoch, learning_rate, batch_size, weight_decay, device, save_dir): device = torch.device(device) dataset = get_dataset(dataset_name, dataset_path) train_length = int(len(dataset) * 0.8) valid_length = int(len(dataset) * 0.1) test_length = len(dataset) - train_length - valid_length train_dataset, valid_dataset, test_dataset = torch.utils.data.random_split( dataset, (train_length, valid_length, test_length)) train_data_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=8) valid_data_loader = DataLoader(valid_dataset, batch_size=batch_size, num_workers=8) test_data_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=8) model = get_model(model_name, dataset).to(device) criterion = torch.nn.BCELoss() optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate, weight_decay=weight_decay) for epoch_i in range(epoch): train(model, optimizer, train_data_loader, criterion, device) auc = test(model, valid_data_loader, device) print('epoch:', epoch_i, 'validation: auc:', auc) auc = test(model, test_data_loader, device) print('test auc:', auc) torch.save(model, f'{save_dir}/{model_name}.pt')
Example #30
Source File: dataset.py From wechat_jump_end_to_end_train with MIT License | 5 votes |
def jump_data_loader(): normalize = transforms.Normalize(mean=[0.92206, 0.92206, 0.92206], std=[0.08426, 0.08426, 0.08426]) transform = transforms.Compose([transforms.ToTensor(),normalize]) dataset = JumpDataset(transform=transform) return DataLoader(dataset,batch_size = 32,shuffle = True)