Python torch.utils.data.ConcatDataset() Examples
The following are 30
code examples of torch.utils.data.ConcatDataset().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.utils.data
, or try the search function
.
Example #1
Source File: run_hnn.py From mt-dnn with MIT License | 6 votes |
def build_training_data(args, tokenizer, tasks): dprd_task = DPRDTask(tokenizer) if args.wiki_data: wiki_task = WikiWSCRTask(tokenizer) train_data = wiki_task.get_train_dataset(args.wiki_data, args.max_seq_length, input_type=tasks) else: train_data = dprd_task.get_train_dataset(args.data_dir, args.max_seq_length, input_type=tasks) if args.dev_train: _data = dprd_task.get_dev_dataset(args.data_dir, args.max_seq_length, input_type=tasks) _data = [e.data for e in _data if e.name=='DPRD-test'][0] train_data = ConcatDataset([train_data, _data]) if args.gap_data: gap_data = gap_task.get_train_dataset(args.gap_data, args.max_seq_length, input_type=tasks) train_data = ConcatDataset([train_data, gap_data]) if args.dev_train: gap_data = [e.data for e in gap_task.get_dev_dataset(args.gap_data, args.max_seq_length, input_type=tasks)] train_data = ConcatDataset(gap_data + [train_data]) return train_data
Example #2
Source File: data.py From torch-kalman with MIT License | 6 votes |
def from_dataframe(cls, dataframe: 'DataFrame', group_colname: str, time_colname: str, dt_unit: Optional[str], measure_colnames: Optional[Sequence[str]] = None, X_colnames: Optional[Sequence[str]] = None, y_colnames: Optional[Sequence[str]] = None, **kwargs) -> 'TimeSeriesDataLoader': dataset = ConcatDataset( datasets=[ TimeSeriesDataset.from_dataframe( dataframe=df, group_colname=group_colname, time_colname=time_colname, measure_colnames=measure_colnames, X_colnames=X_colnames, y_colnames=y_colnames, dt_unit=dt_unit ) for g, df in dataframe.groupby(group_colname) ] ) return cls(dataset=dataset, **kwargs)
Example #3
Source File: data.py From IIC with MIT License | 6 votes |
def _create_mapping_loader(config, dataset_class, partitions): imgs_list = [] for partition in partitions: imgs_curr = dataset_class( **{"config": config, "split": partition, "purpose": "test"} # return testing tuples, image and label ) if config.use_doersch_datasets: imgs_curr = DoerschDataset(config, imgs_curr) imgs_list.append(imgs_curr) imgs = ConcatDataset(imgs_list) dataloader = torch.utils.data.DataLoader(imgs, batch_size=config.batch_sz, # full batch shuffle=False, # no point since not trained on num_workers=0, drop_last=False) return dataloader
Example #4
Source File: test_its_journal_2019.py From ehpi_action_recognition with MIT License | 6 votes |
def get_test_set_lab(dataset_path: str, image_size: ImageSize): num_joints = 15 datasets = [ EhpiLSTMDataset(os.path.join(dataset_path, "JOURNAL_2019_03_TEST_VUE01_30FPS"), transform=transforms.Compose([ RemoveJointsOutsideImgEhpi(image_size), NormalizeEhpi(image_size) ]), num_joints=num_joints, dataset_part=DatasetPart.TEST), EhpiLSTMDataset(os.path.join(dataset_path, "JOURNAL_2019_03_TEST_VUE02_30FPS"), transform=transforms.Compose([ RemoveJointsOutsideImgEhpi(image_size), NormalizeEhpi(image_size) ]), num_joints=num_joints, dataset_part=DatasetPart.TEST), ] for dataset in datasets: dataset.print_label_statistics() return ConcatDataset(datasets)
Example #5
Source File: full_omniglot.py From learn2learn with MIT License | 6 votes |
def __init__(self, root, transform=None, target_transform=None, download=False): self.root = os.path.expanduser(root) self.transform = transform self.target_transform = target_transform # Set up both the background and eval dataset omni_background = Omniglot(self.root, background=True, download=download) # Eval labels also start from 0. # It's important to add 964 to label values in eval so they don't overwrite background dataset. omni_evaluation = Omniglot(self.root, background=False, download=download, target_transform=lambda x: x + len(omni_background._characters)) self.dataset = ConcatDataset((omni_background, omni_evaluation)) self._bookkeeping_path = os.path.join(self.root, 'omniglot-bookkeeping.pkl')
Example #6
Source File: __init__.py From SSD with MIT License | 6 votes |
def build_dataset(dataset_list, transform=None, target_transform=None, is_train=True): assert len(dataset_list) > 0 datasets = [] for dataset_name in dataset_list: data = DatasetCatalog.get(dataset_name) args = data['args'] factory = _DATASETS[data['factory']] args['transform'] = transform args['target_transform'] = target_transform if factory == VOCDataset: args['keep_difficult'] = not is_train elif factory == COCODataset: args['remove_empty'] = is_train dataset = factory(**args) datasets.append(dataset) # for testing, return a list of datasets if not is_train: return datasets dataset = datasets[0] if len(datasets) > 1: dataset = ConcatDataset(datasets) return [dataset]
Example #7
Source File: dataset_enum.py From BatchBALD with GNU General Public License v3.0 | 6 votes |
def get_targets(dataset): """Get the targets of a dataset without any target target transforms(!).""" if isinstance(dataset, TransformedDataset): return get_targets(dataset.dataset) if isinstance(dataset, data.Subset): targets = get_targets(dataset.dataset) return torch.as_tensor(targets)[dataset.indices] if isinstance(dataset, data.ConcatDataset): return torch.cat([get_targets(sub_dataset) for sub_dataset in dataset.datasets]) if isinstance( dataset, (datasets.MNIST, datasets.ImageFolder,) ): return torch.as_tensor(dataset.targets) if isinstance(dataset, datasets.SVHN): return dataset.labels raise NotImplementedError(f"Unknown dataset {dataset}!")
Example #8
Source File: dataset_enum.py From BatchBALD with GNU General Public License v3.0 | 6 votes |
def get_CINIC10(root="./"): cinic_directory = root + "data/CINIC-10" cinic_mean = [0.47889522, 0.47227842, 0.43047404] cinic_std = [0.24205776, 0.23828046, 0.25874835] train_transform = transforms.Compose([transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip()]) shared_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=cinic_mean, std=cinic_std)]) train_dataset = datasets.ImageFolder(cinic_directory + '/train') validation_dataset = datasets.ImageFolder(cinic_directory + '/valid') # Concatenate train and validation set to have more samples. merged_train_dataset = torch.utils.data.ConcatDataset([train_dataset, validation_dataset]) test_dataset = datasets.ImageFolder(cinic_directory + '/test') return DataSource( train_dataset=merged_train_dataset, test_dataset=test_dataset, shared_transform=shared_transform, train_transform=train_transform, )
Example #9
Source File: train_its_journal_2019.py From ehpi_action_recognition with MIT License | 6 votes |
def get_training_set_gt(dataset_path: str, image_size: ImageSize): num_joints = 15 left_indexes: List[int] = [3, 4, 5, 9, 10, 11] right_indexes: List[int] = [6, 7, 8, 12, 13, 14] datasets: List[EhpiLSTMDataset] = [ EhpiLSTMDataset(os.path.join(dataset_path, "JOURNAL_2019_03_GT_30fps"), transform=transforms.Compose([ RemoveJointsOutsideImgEhpi(image_size), ScaleEhpi(image_size), TranslateEhpi(image_size), FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes), NormalizeEhpi(image_size) ]), num_joints=num_joints), ] for dataset in datasets: dataset.print_label_statistics() return ConcatDataset(datasets)
Example #10
Source File: marcuhmot.py From tracking_wo_bnw with GNU General Public License v3.0 | 5 votes |
def __init__(self, split, dataloader): print("[*] Loading Market1501") market = Market1501('gt_bbox', **dataloader) print("[*] Loading CUHK03") cuhk = CUHK03('labeled', **dataloader) print("[*] Loading MOT") mot = MOTreIDWrapper(split, dataloader) self.dataset = ConcatDataset([market, cuhk, mot])
Example #11
Source File: data_silo.py From FARM with Apache License 2.0 | 5 votes |
def random_split_ConcatDataset(self, ds, lengths): """ Roughly split a Concatdataset into non-overlapping new datasets of given lengths. Samples inside Concatdataset should already be shuffled :param ds: Dataset to be split :type ds: Dataset :param lengths: lengths of splits to be produced :type lengths: list """ if sum(lengths) != len(ds): raise ValueError("Sum of input lengths does not equal the length of the input dataset!") try: idx_dataset = np.where(np.array(ds.cumulative_sizes) > lengths[0])[0][0] except IndexError: raise Exception("All dataset chunks are being assigned to train set leaving no samples for dev set. " "Either consider increasing dev_split or setting it to 0.0\n" f"Cumulative chunk sizes: {ds.cumulative_sizes}\n" f"train/dev split: {lengths}") assert idx_dataset >= 1, "Dev_split ratio is too large, there is no data in train set. " \ f"Please lower dev_split = {self.processor.dev_split}" train = ConcatDataset(ds.datasets[:idx_dataset]) test = ConcatDataset(ds.datasets[idx_dataset:]) return train, test
Example #12
Source File: base.py From DSD-SATN with Apache License 2.0 | 5 votes |
def _create_data_loader(self,train_flag=True,hard_minging=False): print('gathering datasets') if self.internet: datasets = Internet(train_flag = train_flag,high_resolution = self.high_resolution, spawn = self.receptive_field,video=self.video) elif self.test_single: datasets = Demo_Loader(train_flag = train_flag,high_resolution = self.high_resolution) elif self.eval_pw3d: datasets = PW3D(train_flag = train_flag,high_resolution = self.high_resolution, spawn = self.receptive_field,video=self.video,kps_alpha_format=self.alpha_format) else: datasets_list = [] if self.with_h36m: h36m = hum36m_dataloader(scale_range = [1.0, 1.6],train_flag=train_flag)#[1.4, 1.6], datasets_list = [h36m] if self.with_up: updataset = UP(train_flag=train_flag,high_resolution=self.high_resolution) datasets_list.append(updataset) if self.with_mpii: mpii = MPIIDataset(train_flag=train_flag,high_resolution=self.high_resolution,) datasets_list.append(mpii) if self.with_aich: aich = AICH(train_flag=train_flag,high_resolution=self.high_resolution,) datasets_list.append(aich) if self.with_pa: pa = Penn_Action(train_flag = train_flag,high_resolution = self.high_resolution,kps_alpha_format=self.alpha_format,spawn = self.receptive_field,video=self.video,receptive_field = self.receptive_field,) datasets_list.append(pa) datasets = torch.utils.data.ConcatDataset(list(datasets_list)) print('gathered datasets') return DataLoader(dataset = datasets, batch_size = self.batch_size if train_flag else self.val_batch_size,\ shuffle = True,drop_last = False, pin_memory = True,num_workers = self.nw)
Example #13
Source File: base.py From DSD-SATN with Apache License 2.0 | 5 votes |
def _create_adv_data_loader(self, data_adv_set): data_set = [] for data_set_name in data_adv_set: data_set_path = config.data_set_path[data_set_name] if data_set_name == 'mosh': mosh = Mosh(data_set_path = data_set_path,) data_set.append(mosh) else: msg = 'invalid adv dataset' sys.exit(msg) con_adv_dataset = ConcatDataset(data_set) return DataLoader(dataset = con_adv_dataset,batch_size = self.batch_size, shuffle = True,drop_last = True,pin_memory = True)
Example #14
Source File: dataset_enum.py From BatchBALD with GNU General Public License v3.0 | 5 votes |
def get_RepeatedMNIST(): # num_classes = 10, input_size = 28 transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) org_train_dataset = datasets.MNIST("data", train=True, download=True, transform=transform) train_dataset = data.ConcatDataset([org_train_dataset] * 3) test_dataset = datasets.MNIST("data", train=False, transform=transform) return DataSource(train_dataset=train_dataset, test_dataset=test_dataset)
Example #15
Source File: data.py From IIC with MIT License | 5 votes |
def _create_dataloaders(config, dataset_class): # unlike in clustering, each dataloader here returns pairs of images - we # need the matrix relation between them dataloaders = [] do_shuffle = (config.num_dataloaders == 1) for d_i in xrange(config.num_dataloaders): print("Creating paired dataloader %d out of %d time %s" % (d_i, config.num_dataloaders, datetime.now())) sys.stdout.flush() train_imgs_list = [] for train_partition in config.train_partitions: train_imgs_curr = dataset_class( **{"config": config, "split": train_partition, "purpose": "train"} # return training tuples, not including labels ) if config.use_doersch_datasets: train_imgs_curr = DoerschDataset(config, train_imgs_curr) train_imgs_list.append(train_imgs_curr) train_imgs = ConcatDataset(train_imgs_list) train_dataloader = torch.utils.data.DataLoader(train_imgs, batch_size=config.dataloader_batch_sz, shuffle=do_shuffle, num_workers=0, drop_last=False) if d_i > 0: assert (len(train_dataloader) == len(dataloaders[d_i - 1])) dataloaders.append(train_dataloader) num_train_batches = len(dataloaders[0]) print("Length of paired datasets vector %d" % len(dataloaders)) print("Number of batches per epoch: %d" % num_train_batches) sys.stdout.flush() return dataloaders
Example #16
Source File: build.py From afm_cvpr2019 with MIT License | 5 votes |
def build_train_dataset(config): root_list = [osp.abspath(osp.join(osp.dirname(__file__),'..','data', f)) for f in config.DATASETS.TRAIN] IN_RES = [config.INPUT.IN_RES]*2 OUT_RES= [config.INPUT.OUT_RES]*2 get_dataset = lambda path: AFMTrainDataset(path, img_res=IN_RES, afm_res=OUT_RES) dataset = data.ConcatDataset(list(map(get_dataset,root_list))) dataset = data.DataLoader(dataset, batch_size=config.SOLVER.BATCH_SIZE,shuffle=True,num_workers=config.DATALOADER.NUM_WORKERS, pin_memory=True) return dataset
Example #17
Source File: run_hnn.py From mt-dnn with MIT License | 5 votes |
def build_training_data_mt(args, tokenizer): if args.group_tasks: return build_training_data(args, tokenizer, args.tasks) else: data = [] for t in args.tasks: data.append(build_training_data(args, tokenizer, [t])) return ConcatDataset(data)
Example #18
Source File: data.py From continual-learning with MIT License | 5 votes |
def get_dataset(name, type='train', download=True, capacity=None, permutation=None, dir='./datasets', verbose=False, target_transform=None): '''Create [train|valid|test]-dataset.''' data_name = 'mnist' if name=='mnist28' else name dataset_class = AVAILABLE_DATASETS[data_name] # specify image-transformations to be applied dataset_transform = transforms.Compose([ *AVAILABLE_TRANSFORMS[name], transforms.Lambda(lambda x: _permutate_image_pixels(x, permutation)), ]) # load data-set dataset = dataset_class('{dir}/{name}'.format(dir=dir, name=data_name), train=False if type=='test' else True, download=download, transform=dataset_transform, target_transform=target_transform) # print information about dataset on the screen if verbose: print(" --> {}: '{}'-dataset consisting of {} samples".format(name, type, len(dataset))) # if dataset is (possibly) not large enough, create copies until it is. if capacity is not None and len(dataset) < capacity: dataset = ConcatDataset([copy.deepcopy(dataset) for _ in range(int(np.ceil(capacity / len(dataset))))]) return dataset #----------------------------------------------------------------------------------------------------------#
Example #19
Source File: data.py From pytorch-deep-generative-replay with MIT License | 5 votes |
def get_dataset(name, train=True, permutation=None, capacity=None): dataset = (TRAIN_DATASETS[name] if train else TEST_DATASETS[name])() dataset.transform = transforms.Compose([ dataset.transform, transforms.Lambda(lambda x: _permutate_image_pixels(x, permutation)), ]) if capacity is not None and len(dataset) < capacity: return ConcatDataset([ copy.deepcopy(dataset) for _ in range(math.ceil(capacity / len(dataset))) ]) else: return dataset
Example #20
Source File: core.py From texture_fields with MIT License | 5 votes |
def __init__(self, dataset_folder, fields, split=None, classes=None, no_except=True, transform=None): # Read metadata file metadata_file = os.path.join(dataset_folder, 'metadata.yaml') if os.path.exists(metadata_file): with open(metadata_file, 'r') as f: metadata = yaml.load(f) else: metadata = {} # If classes is None, use all subfolders if classes is None: classes = os.listdir(dataset_folder) classes = [c for c in classes if os.path.isdir(os.path.join(dataset_folder, c))] # Get all sub-datasets self.datasets_classes = [] for c in classes: subpath = os.path.join(dataset_folder, c) if not os.path.isdir(subpath): logger.warning('Class %s does not exist in dataset.' % c) metadata_c = metadata.get(c, {'id': c, 'name': 'n/a'}) dataset = Shapes3dClassDataset(subpath, fields, split, metadata_c, no_except, transform=transform) self.datasets_classes.append(dataset) self._concat_dataset = data.ConcatDataset(self.datasets_classes)
Example #21
Source File: train_ehpi.py From ehpi_action_recognition with MIT License | 5 votes |
def get_train_set(dataset_path: str, image_size: ImageSize): num_joints = 15 left_indexes: List[int] = [3, 4, 5, 9, 10, 11] right_indexes: List[int] = [6, 7, 8, 12, 13, 14] datasets: List[EhpiDataset] = [ # Set 1 EhpiDataset(os.path.join(dataset_path, "ofp_record_2019_03_11_HSRT_30FPS"), transform=transforms.Compose([ RemoveJointsOutsideImgEhpi(image_size), ScaleEhpi(image_size), TranslateEhpi(image_size), FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes), NormalizeEhpi(image_size) ]), num_joints=num_joints, dataset_part=DatasetPart.TEST), # Set 2 EhpiDataset(os.path.join(dataset_path, "2019_03_13_Freilichtmuseum_30FPS"), transform=transforms.Compose([ RemoveJointsOutsideImgEhpi(image_size), ScaleEhpi(image_size), TranslateEhpi(image_size), FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes), NormalizeEhpi(image_size) ]), num_joints=num_joints, dataset_part=DatasetPart.TRAIN), ] for dataset in datasets: dataset.print_label_statistics() return ConcatDataset(datasets)
Example #22
Source File: train_its_journal_2019.py From ehpi_action_recognition with MIT License | 5 votes |
def get_training_set_both(dataset_path: str, image_size: ImageSize): num_joints = 15 left_indexes: List[int] = [3, 4, 5, 9, 10, 11] right_indexes: List[int] = [6, 7, 8, 12, 13, 14] datasets: List[EhpiLSTMDataset] = [ EhpiLSTMDataset(os.path.join(dataset_path, "JOURNAL_2019_03_POSEALGO_30fps"), transform=transforms.Compose([ RemoveJointsOutsideImgEhpi(image_size), ScaleEhpi(image_size), TranslateEhpi(image_size), FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes), NormalizeEhpi(image_size) ]), num_joints=num_joints), EhpiLSTMDataset(os.path.join(dataset_path, "JOURNAL_2019_03_GT_30fps"), transform=transforms.Compose([ RemoveJointsOutsideImgEhpi(image_size), ScaleEhpi(image_size), TranslateEhpi(image_size), FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes), NormalizeEhpi(image_size) ]), num_joints=num_joints), ] for dataset in datasets: dataset.print_label_statistics() return ConcatDataset(datasets)
Example #23
Source File: train_ehpi_itsc_2019_ofp.py From ehpi_action_recognition with MIT License | 5 votes |
def get_sim_gt_only(dataset_path: str, image_size: ImageSize): num_joints = 15 left_indexes: List[int] = [3, 4, 5, 9, 10, 11] right_indexes: List[int] = [6, 7, 8, 12, 13, 14] datasets: List[EhpiDataset] = [ EhpiDataset(os.path.join(dataset_path, "ofp_sim_gt_equal_30fps"), transform=transforms.Compose([ RemoveJointsOutsideImgEhpi(image_size), RemoveJointsEhpi(indexes_to_remove=foot_indexes, indexes_to_remove_2=knee_indexes, probability=0.25), ScaleEhpi(image_size), TranslateEhpi(image_size), FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes), NormalizeEhpi(image_size) ]), num_joints=num_joints), EhpiDataset(os.path.join(dataset_path, "ofp_from_mocap_gt_30fps"), transform=transforms.Compose([ RemoveJointsOutsideImgEhpi(image_size), RemoveJointsEhpi(indexes_to_remove=foot_indexes, indexes_to_remove_2=knee_indexes, probability=0.25), ScaleEhpi(image_size), TranslateEhpi(image_size), FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes), NormalizeEhpi(image_size) ]), num_joints=num_joints), ] for dataset in datasets: dataset.print_label_statistics() return ConcatDataset(datasets)
Example #24
Source File: train_ehpi_itsc_2019_ofp.py From ehpi_action_recognition with MIT License | 5 votes |
def get_sim_pose_algo_only(dataset_path: str, image_size: ImageSize): num_joints = 15 left_indexes: List[int] = [3, 4, 5, 9, 10, 11] right_indexes: List[int] = [6, 7, 8, 12, 13, 14] datasets: List[EhpiDataset] = [ EhpiDataset(os.path.join(dataset_path, "ofp_sim_pose_algo_equal_30fps"), transform=transforms.Compose([ RemoveJointsOutsideImgEhpi(image_size), RemoveJointsEhpi(indexes_to_remove=foot_indexes, indexes_to_remove_2=knee_indexes, probability=0.25), ScaleEhpi(image_size), TranslateEhpi(image_size), FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes), NormalizeEhpi(image_size) ]), num_joints=num_joints), EhpiDataset(os.path.join(dataset_path, "ofp_from_mocap_pose_algo_30fps"), transform=transforms.Compose([ RemoveJointsOutsideImgEhpi(image_size), RemoveJointsEhpi(indexes_to_remove=foot_indexes, indexes_to_remove_2=knee_indexes, probability=0.25), ScaleEhpi(image_size), TranslateEhpi(image_size), FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes), NormalizeEhpi(image_size) ]), num_joints=num_joints), ] for dataset in datasets: dataset.print_label_statistics() return ConcatDataset(datasets)
Example #25
Source File: dataset.py From source_separation with Apache License 2.0 | 5 votes |
def get_concated_datasets(meta_dir_list: List[str], batch_size: int, num_workers: int, meta_cls_list: List[MetaFrame], fix_len: int = 0, skip_audio: bool = False, sample_rate: int = 44100, audio_mask: bool = False) -> Tuple[SpeechDataLoader, SpeechDataLoader]: assert all([os.path.isdir(x) for x in meta_dir_list]), 'There are not valid directory paths!'.format() assert len(meta_dir_list) == len(meta_cls_list), 'meta_dir_list, meta_cls_list are must have same length!' # datasets train_datasets = [] valid_datasets = [] for meta_cls, meta_dir in zip(meta_cls_list, meta_dir_list): train_file, valid_file = meta_cls.frame_file_names[1:] # load meta file train_meta = meta_cls(os.path.join(meta_dir, train_file), sr=sample_rate) valid_meta = meta_cls(os.path.join(meta_dir, valid_file), sr=sample_rate) # create dataset train_dataset = AugmentSpeechDataset(train_meta, fix_len=fix_len, skip_audio=skip_audio, audio_mask=audio_mask) valid_dataset = AugmentSpeechDataset(valid_meta, fix_len=fix_len, skip_audio=skip_audio, audio_mask=audio_mask) train_datasets.append(train_dataset) valid_datasets.append(valid_dataset) # make concat dataset train_conc_dataset = ConcatDataset(train_datasets) valid_conc_dataset = ConcatDataset(valid_datasets) # create data loader train_loader = SpeechDataLoader(train_conc_dataset, batch_size=batch_size, is_bucket=False, num_workers=num_workers, skip_last_bucket=False) valid_loader = SpeechDataLoader(valid_conc_dataset, batch_size=batch_size, is_bucket=False, num_workers=num_workers, skip_last_bucket=False) return train_loader, valid_loader
Example #26
Source File: language_modeling.py From training_results_v0.5 with Apache License 2.0 | 5 votes |
def load_dataset(self, split, combine=False): """Load a dataset split.""" loaded_datasets = [] for k in itertools.count(): split_k = split + (str(k) if k > 0 else '') path = os.path.join(self.args.data, split_k) if self.args.raw_text and IndexedRawTextDataset.exists(path): ds = IndexedRawTextDataset(path, self.dictionary) tokens = [t for l in ds.tokens_list for t in l] elif not self.args.raw_text and IndexedInMemoryDataset.exists(path): ds = IndexedInMemoryDataset(path, fix_lua_indexing=True) tokens = ds.buffer else: if k > 0: break else: raise FileNotFoundError('Dataset not found: {} ({})'.format(split, self.args.data)) loaded_datasets.append( TokenBlockDataset( tokens, ds.sizes, self.args.tokens_per_sample, self.args.sample_break_mode, include_targets=True )) print('| {} {} {} examples'.format(self.args.data, split_k, len(loaded_datasets[-1]))) if not combine: break if len(loaded_datasets) == 1: dataset = loaded_datasets[0] sizes = dataset.sizes else: dataset = ConcatDataset(loaded_datasets) sizes = np.concatenate([ds.sizes for ds in loaded_datasets]) self.datasets[split] = MonolingualDataset(dataset, sizes, self.dictionary, shuffle=False)
Example #27
Source File: slides.py From torchsupport with MIT License | 5 votes |
def MultiSlideData(self, paths, size=(224, 224), level=0, transform=lambda x: x): datasets = [] for path in paths: datasets.append(SingleSlideData(path, size=size, level=level, transform=transform)) return ConcatDataset(datasets)
Example #28
Source File: dataloader.py From OpenLongTailRecognition-OLTR with BSD 3-Clause "New" or "Revised" License | 5 votes |
def load_data(data_root, dataset, phase, batch_size, sampler_dic=None, num_workers=4, test_open=False, shuffle=True): txt = './data/%s/%s_%s.txt'%(dataset, dataset, (phase if phase != 'train_plain' else 'train')) print('Loading data from %s' % (txt)) if phase not in ['train', 'val']: transform = data_transforms['test'] else: transform = data_transforms[phase] print('Use data transformation:', transform) set_ = LT_Dataset(data_root, txt, transform) if phase == 'test' and test_open: open_txt = './data/%s/%s_open.txt'%(dataset, dataset) print('Testing with opensets from %s'%(open_txt)) open_set_ = LT_Dataset('./data/%s/%s_open'%(dataset, dataset), open_txt, transform) set_ = ConcatDataset([set_, open_set_]) if sampler_dic and phase == 'train': print('Using sampler.') print('Sample %s samples per-class.' % sampler_dic['num_samples_cls']) return DataLoader(dataset=set_, batch_size=batch_size, shuffle=False, sampler=sampler_dic['sampler'](set_, sampler_dic['num_samples_cls']), num_workers=num_workers) else: print('No sampler.') print('Shuffle is %s.' % (shuffle)) return DataLoader(dataset=set_, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
Example #29
Source File: video_datasets.py From RCRNet-Pytorch with MIT License | 4 votes |
def get_datasets(name_list, split_list, config_path, root, training, transforms, read_clip=False, random_reverse_clip=False, label_interval=1, frame_between_label_num=0, clip_len=4): """ return type of data.ConcatDataset or single dataset data.Dataset """ if not isinstance(name_list, list): name_list = [name_list] if not isinstance(split_list, list): split_list = [split_list] if len(name_list) != len(split_list): raise ValueError("Dataset numbers must match split numbers") # read dataset config datasets_config = yaml.load(open(config_path)) # get datasets dataset_list = [] for name, split in zip(name_list, split_list): if name not in datasets_config.keys(): raise ValueError("Error dataset name {}".format(name)) dataset_config = datasets_config[name] dataset_config['name'] = name dataset_config['root'] = root dataset_config['split'] = split dataset_config['training'] = training dataset_config['transforms'] = transforms if "video_split" in dataset_config: dataset_config['label_interval'] = label_interval dataset_config['frame_between_label_num'] = frame_between_label_num if read_clip: dataset = VideoClipDataset(clip_len=clip_len, random_reverse_clip=random_reverse_clip, **dataset_config) else: dataset = VideoImageDataset(**dataset_config) else: dataset = ImageDataset(**dataset_config) dataset_list.append(dataset) if len(dataset_list) == 1: return dataset_list[0] else: return data.ConcatDataset(dataset_list)
Example #30
Source File: train_ehpi_itsc_2019_ofp.py From ehpi_action_recognition with MIT License | 4 votes |
def get_set_wo_sim(dataset_path: str, image_size: ImageSize): num_joints = 15 left_indexes: List[int] = [3, 4, 5, 9, 10, 11] right_indexes: List[int] = [6, 7, 8, 12, 13, 14] datasets: List[EhpiDataset] = [ EhpiDataset(os.path.join(dataset_path, "ofp_webcam"), transform=transforms.Compose([ RemoveJointsOutsideImgEhpi(image_size), ScaleEhpi(image_size), TranslateEhpi(image_size), FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes), NormalizeEhpi(image_size) ]), num_joints=num_joints), EhpiDataset(os.path.join(dataset_path, "ofp_record_2019_03_11_30FPS"), transform=transforms.Compose([ RemoveJointsOutsideImgEhpi(image_size), ScaleEhpi(image_size), TranslateEhpi(image_size), FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes), NormalizeEhpi(image_size) ]), num_joints=num_joints), EhpiDataset(os.path.join(dataset_path, "ofp_record_2019_03_11_HSRT_30FPS"), transform=transforms.Compose([ RemoveJointsOutsideImgEhpi(image_size), ScaleEhpi(image_size), TranslateEhpi(image_size), FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes), NormalizeEhpi(image_size) ]), num_joints=num_joints, dataset_part=DatasetPart.TEST), EhpiDataset(os.path.join(dataset_path, "ofp_record_2019_03_11_HELLA_30FPS"), transform=transforms.Compose([ RemoveJointsOutsideImgEhpi(image_size), ScaleEhpi(image_size), TranslateEhpi(image_size), FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes), NormalizeEhpi(image_size) ]), num_joints=num_joints, dataset_part=DatasetPart.TRAIN), # Freilichtmuseum EhpiDataset(os.path.join(dataset_path, "2019_03_13_Freilichtmuseum_30FPS"), transform=transforms.Compose([ RemoveJointsOutsideImgEhpi(image_size), ScaleEhpi(image_size), TranslateEhpi(image_size), FlipEhpi(left_indexes=left_indexes, right_indexes=right_indexes), NormalizeEhpi(image_size) ]), num_joints=num_joints, dataset_part=DatasetPart.TRAIN), ] for dataset in datasets: dataset.print_label_statistics() return ConcatDataset(datasets)