Python random.shuffle() Examples
The following are 30
code examples of random.shuffle().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
random
, or try the search function
.
Example #1
Source File: DataLoader_NER.py From pytorch_NER_BiLSTM_CNN_CRF with Apache License 2.0 | 6 votes |
def __init__(self, path, shuffle, config): """ :param path: data path list :param shuffle: shuffle bool :param config: config """ # print("Loading Data......") self.data_list = [] self.max_count = config.max_count self.path = path self.shuffle = shuffle # char feature self.pad_char = [char_pad, char_pad] # self.pad_char = [] self.max_char_len = config.max_char_len
Example #2
Source File: estimator_utils.py From EDeN with MIT License | 6 votes |
def make_train_test_sets(pos_graphs, neg_graphs, test_proportion=.3, random_state=2): """make_train_test_sets.""" random.seed(random_state) random.shuffle(pos_graphs) random.shuffle(neg_graphs) pos_dim = len(pos_graphs) neg_dim = len(neg_graphs) tr_pos_graphs = pos_graphs[:-int(pos_dim * test_proportion)] te_pos_graphs = pos_graphs[-int(pos_dim * test_proportion):] tr_neg_graphs = neg_graphs[:-int(neg_dim * test_proportion)] te_neg_graphs = neg_graphs[-int(neg_dim * test_proportion):] tr_graphs = tr_pos_graphs + tr_neg_graphs te_graphs = te_pos_graphs + te_neg_graphs tr_targets = [1] * len(tr_pos_graphs) + [0] * len(tr_neg_graphs) te_targets = [1] * len(te_pos_graphs) + [0] * len(te_neg_graphs) tr_graphs, tr_targets = paired_shuffle(tr_graphs, tr_targets) te_graphs, te_targets = paired_shuffle(te_graphs, te_targets) return (tr_graphs, np.array(tr_targets)), (te_graphs, np.array(te_targets))
Example #3
Source File: utils.py From deep-learning-note with MIT License | 6 votes |
def data_iter_random(corpus_indices, batch_size, num_steps, device=None): # 减1是因为输出的索引x是相应输入的索引y加1 num_examples = (len(corpus_indices) - 1) // num_steps epoch_size = num_examples // batch_size example_indices = list(range(num_examples)) random.shuffle(example_indices) # 返回从pos开始的长为num_steps的序列 def _data(pos): return corpus_indices[pos: pos + num_steps] if device is None: device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') for i in range(epoch_size): # 每次读取batch_size个随机样本 i = i * batch_size batch_indices = example_indices[i: i + batch_size] X = [_data(j * num_steps) for j in batch_indices] Y = [_data(j * num_steps + 1) for j in batch_indices] yield torch.tensor(X, dtype=torch.float32, device=device), torch.tensor(Y, dtype=torch.float32, device=device)
Example #4
Source File: detection.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def __init__(self, batch_size, data_shape, path_imgrec=None, path_imglist=None, path_root=None, path_imgidx=None, shuffle=False, part_index=0, num_parts=1, aug_list=None, imglist=None, data_name='data', label_name='label', **kwargs): super(ImageDetIter, self).__init__(batch_size=batch_size, data_shape=data_shape, path_imgrec=path_imgrec, path_imglist=path_imglist, path_root=path_root, path_imgidx=path_imgidx, shuffle=shuffle, part_index=part_index, num_parts=num_parts, aug_list=[], imglist=imglist, data_name=data_name, label_name=label_name) if aug_list is None: self.auglist = CreateDetAugmenter(data_shape, **kwargs) else: self.auglist = aug_list # went through all labels to get the proper label shape label_shape = self._estimate_label_shape() self.provide_label = [(label_name, (self.batch_size, label_shape[0], label_shape[1]))] self.label_shape = label_shape
Example #5
Source File: test_recordio.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def test_indexed_recordio(): fidx = tempfile.mktemp() frec = tempfile.mktemp() N = 255 writer = mx.recordio.MXIndexedRecordIO(fidx, frec, 'w') for i in range(N): if sys.version_info[0] < 3: writer.write_idx(i, str(chr(i))) else: writer.write_idx(i, bytes(str(chr(i)), 'utf-8')) del writer reader = mx.recordio.MXIndexedRecordIO(fidx, frec, 'r') keys = reader.keys assert sorted(keys) == [i for i in range(N)] random.shuffle(keys) for i in keys: res = reader.read_idx(i) if sys.version_info[0] < 3: assert res == str(chr(i)) else: assert res == bytes(str(chr(i)), 'utf-8')
Example #6
Source File: BasePythonDataLayer.py From Caffe-Python-Data-Layer with BSD 2-Clause "Simplified" License | 6 votes |
def preload_db(self): """Read all images in and all labels Implemenation relies on DataManager Classes """ print("Preloading Data...") if self._source_type == 'BCF': self._data_manager = BCFDataManager(self._layer_params) elif self._source_type == 'CSV': self._data_manager = CSVDataManager(self._layer_params) elif self._source_type == 'LMDB': self._data_manager = LMDBDataManager(self._layer_params) # read all data self._data, self._label = self._data_manager.load_all() self._sample_count = len(self._data) if self._shuffle: self.shuffle()
Example #7
Source File: BasePythonDataLayer.py From Caffe-Python-Data-Layer with BSD 2-Clause "Simplified" License | 6 votes |
def setup(self, bottom, top): layer_params = yaml.load(self.param_str) self._layer_params = layer_params # default batch_size = 256 self._batch_size = int(layer_params.get('batch_size', 256)) self._resize = layer_params.get('resize', -1) self._mean_file = layer_params.get('mean_file', None) self._source_type = layer_params.get('source_type', 'CSV') self._shuffle = layer_params.get('shuffle', False) # read image_mean from file and preload all data into memory # will read either file or array into self._mean self.set_mean() self.preload_db() self._compressed = self._layer_params.get('compressed', True) if not self._compressed: self.decompress_data()
Example #8
Source File: 30_series_sampling.py From deep-learning-note with MIT License | 6 votes |
def data_iter_random(corpus_indices, batch_size, num_steps, device=None): # 减1是因为输出的索引x是相应输入的索引y加1 num_examples = (len(corpus_indices) - 1) // num_steps epoch_size = num_examples // batch_size example_indices = list(range(num_examples)) random.shuffle(example_indices) # 返回从pos开始的长为num_steps的序列 def _data(pos): return corpus_indices[pos: pos + num_steps] if device is None: device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') for i in range(epoch_size): # 每次读取batch_size个随机样本 i = i * batch_size batch_indices = example_indices[i: i + batch_size] X = [_data(j * num_steps) for j in batch_indices] Y = [_data(j * num_steps + 1) for j in batch_indices] yield torch.tensor(X, dtype=torch.float32, device=device), torch.tensor(Y, dtype=torch.float32, device=device)
Example #9
Source File: utils.py From deep-learning-note with MIT License | 6 votes |
def load_data_fashion_mnist(batch_size, resize=None, root='./data'): """Download the fashion mnist dataset and then load into memory.""" trans = [] if resize: trans.append(torchvision.transforms.Resize(size=resize)) trans.append(torchvision.transforms.ToTensor()) transform = torchvision.transforms.Compose(trans) mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform) mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform) if sys.platform.startswith('win'): num_workers = 0 # 0表示不用额外的进程来加速读取数据 else: num_workers = 4 train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers) test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers) return train_iter, test_iter
Example #10
Source File: data.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def get_caltech101_iterator(batch_size, num_workers, dtype): def transform(image, label): # resize the shorter edge to 224, the longer edge will be greater or equal to 224 resized = mx.image.resize_short(image, 224) # center and crop an area of size (224,224) cropped, crop_info = mx.image.center_crop(resized, (224, 224)) # transpose the channels to be (3,224,224) transposed = mx.nd.transpose(cropped, (2, 0, 1)) return transposed, label training_path, testing_path = get_caltech101_data() dataset_train = ImageFolderDataset(root=training_path, transform=transform) dataset_test = ImageFolderDataset(root=testing_path, transform=transform) train_data = DataLoader(dataset_train, batch_size, shuffle=True, num_workers=num_workers) test_data = DataLoader(dataset_test, batch_size, shuffle=False, num_workers=num_workers) return DataLoaderIter(train_data), DataLoaderIter(test_data)
Example #11
Source File: atomic.py From comet-commonsense with Apache License 2.0 | 6 votes |
def shuffle_sequences(self, split="train", keys=None): if keys is None: # print(type(self.data)) # print(type(self.data.keys())) keys = self.data[split].keys() for key in keys: idxs = list(range(len(self.data[split][key]))) random.shuffle(idxs) self.sequences[split][key] = \ self.sequences[split][key].index_select( 0, torch.LongTensor(idxs)) temp = [self.data[split][key][i] for i in idxs] self.data[split][key] = temp temp = [self.masks[split][key][i] for i in idxs] self.masks[split][key] = temp
Example #12
Source File: conceptnet.py From comet-commonsense with Apache License 2.0 | 6 votes |
def shuffle_sequences(self, split="train", keys=None): if keys is None: # print(type(self.data)) # print(type(self.data.keys())) keys = self.data[split].keys() for key in keys: if key in ["positive", "negative"]: continue idxs = list(range(len(self.data[split][key]))) random.shuffle(idxs) self.sequences[split][key] = \ self.sequences[split][key].index_select( 0, torch.LongTensor(idxs)) temp = [self.data[split][key][i] for i in idxs] self.data[split][key] = temp temp = [self.masks[split][key][i] for i in idxs] self.masks[split][key] = temp
Example #13
Source File: DataLoader_NER.py From pytorch_NER_BiLSTM_CNN_CRF with Apache License 2.0 | 6 votes |
def dataLoader(self): """ :return: """ path = self.path shuffle = self.shuffle assert isinstance(path, list), "Path Must Be In List" print("Data Path {}".format(path)) for id_data in range(len(path)): print("Loading Data Form {}".format(path[id_data])) insts = self._Load_Each_Data(path=path[id_data], shuffle=shuffle) random.shuffle(insts) self._write_shuffle_inst_to_file(insts, path=path[id_data]) self.data_list.append(insts) # return train/dev/test data if len(self.data_list) == 3: return self.data_list[0], self.data_list[1], self.data_list[2] elif len(self.data_list) == 2: return self.data_list[0], self.data_list[1]
Example #14
Source File: data.py From VSE-C with MIT License | 6 votes |
def get_loader_single(data_name, split, root, json, vocab, transform, batch_size=100, shuffle=True, num_workers=2, ids=None, collate_fn=collate_fn): """Returns torch.utils.data.DataLoader for custom coco dataset.""" if 'coco' in data_name: # COCO custom dataset dataset = CocoDataset(root=root, json=json, vocab=vocab, transform=transform, ids=ids) elif 'f8k' in data_name or 'f30k' in data_name: dataset = FlickrDataset(root=root, split=split, json=json, vocab=vocab, transform=transform) # Data loader data_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=shuffle, pin_memory=True, num_workers=num_workers, collate_fn=collate_fn) return data_loader
Example #15
Source File: chainer_alex.py From mlimages with MIT License | 6 votes |
def show(limit, shuffle=True): td = TrainingData(LABEL_FILE, img_root=IMAGES_ROOT, mean_image_file=MEAN_IMAGE_FILE, image_property=IMAGE_PROP) _limit = limit if limit > 0 else 5 iterator = td.generate() if shuffle: import random shuffled = list(iterator) random.shuffle(shuffled) iterator = iter(shuffled) i = 0 for arr, im in iterator: restored = td.data_to_image(arr, im.label, raw=True) print(im.path) restored.image.show() i += 1 if i >= _limit: break
Example #16
Source File: concat_db.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def _load_image_set_index(self, shuffle): """ get total number of images, init indices Parameters ---------- shuffle : bool whether to shuffle the initial indices """ self.num_images = 0 for db in self.imdbs: self.num_images += db.num_images indices = list(range(self.num_images)) if shuffle: random.shuffle(indices) return indices
Example #17
Source File: iterators.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def reset(self): """Resets the iterator to the beginning of the data.""" self.curr_idx = 0 #shuffle data in each bucket random.shuffle(self.idx) for i, buck in enumerate(self.sentences): self.indices[i], self.sentences[i], self.characters[i], self.label[i] = shuffle(self.indices[i], self.sentences[i], self.characters[i], self.label[i]) self.ndindex = [] self.ndsent = [] self.ndchar = [] self.ndlabel = [] #for each bucket of data for i, buck in enumerate(self.sentences): #append the lists with an array self.ndindex.append(ndarray.array(self.indices[i], dtype=self.dtype)) self.ndsent.append(ndarray.array(self.sentences[i], dtype=self.dtype)) self.ndchar.append(ndarray.array(self.characters[i], dtype=self.dtype)) self.ndlabel.append(ndarray.array(self.label[i], dtype=self.dtype))
Example #18
Source File: turing.py From gated-graph-transformer-network with MIT License | 6 votes |
def encode_turing_machine_rules(rules, starting_state=None, story=None): if story is None: story = graph_tools.Story() graph = story.graph if starting_state is None: starting_state = random.choice(len(rules)) the_edges = [(cstate, read, write, nstate, direc) for (cstate, stuff) in enumerate(rules) for (read, (write, nstate, direc)) in enumerate(stuff)] random.shuffle(the_edges) for cstate, read, write, nstate, direc in the_edges: source = graph.make_unique('state_{}'.format(cstate)) dest = graph.make_unique('state_{}'.format(nstate)) edge_type = "rule_{}_{}_{}".format(read,write,direc) source[edge_type] = dest story.add_line("rule {} {} {} {} {}".format(source.type, read, write, dest.type, direc)) head = graph.make_unique('head') head.state = graph.make_unique('state_{}'.format(starting_state)) story.add_line("start {}".format(head.state.type)) return story
Example #19
Source File: MoveGenerator.py From fullrmc with GNU Affero General Public License v3.0 | 6 votes |
def move(self, coordinates): """ Move coordinates. :Parameters: #. coordinates (np.ndarray): The coordinates on which to apply the transformation. :Returns: #. coordinates (np.ndarray): The new coordinates after applying the transformation. """ indexes = range(len(self.__combination)) if self.__shuffle: shuffle( indexes ) # create the move combination for idx in indexes: coordinates = self.__combination[idx].move(coordinates) return coordinates
Example #20
Source File: data.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def get_imagenet_iterator(root, batch_size, num_workers, data_shape=224, dtype='float32'): """Dataset loader with preprocessing.""" train_dir = os.path.join(root, 'train') train_transform, val_transform = get_imagenet_transforms(data_shape, dtype) logging.info("Loading image folder %s, this may take a bit long...", train_dir) train_dataset = ImageFolderDataset(train_dir, transform=train_transform) train_data = DataLoader(train_dataset, batch_size, shuffle=True, last_batch='discard', num_workers=num_workers) val_dir = os.path.join(root, 'val') if not os.path.isdir(os.path.expanduser(os.path.join(root, 'val', 'n01440764'))): user_warning = 'Make sure validation images are stored in one subdir per category, a helper script is available at https://git.io/vNQv1' raise ValueError(user_warning) logging.info("Loading image folder %s, this may take a bit long...", val_dir) val_dataset = ImageFolderDataset(val_dir, transform=val_transform) val_data = DataLoader(val_dataset, batch_size, last_batch='keep', num_workers=num_workers) return DataLoaderIter(train_data, dtype), DataLoaderIter(val_data, dtype)
Example #21
Source File: MoveGenerator.py From fullrmc with GNU Affero General Public License v3.0 | 5 votes |
def _codify__(self, name='generator', group=None, addDependencies=True): assert isinstance(name, basestring), LOGGER.error("name must be a string") assert re.match('[a-zA-Z_][a-zA-Z0-9_]*$', name) is not None, LOGGER.error("given name '%s' can't be used as a variable name"%name) dependencies = collections.OrderedDict() dependencies['from fullrmc.Core import MoveGenerator'] = True code = [] combination = [] # codify generators for idx, gen in enumerate(self.__combination): nm = '%s_%i'%(name,idx) dep, cd = gen._codify__(group=None, name=nm, addDependencies=True) code.append(cd) combination.append(nm) for d in dep: _ = dependencies.setdefault(d,True) # codify combinator code.append("{name} = MoveGenerator.MoveGeneratorCombinator\ (group={group}, combination=[{combination}], shuffle={shuffle})" .format(name=name, group=group, combination=', '.join(combination), shuffle=self.shuffle)) # set dependencies dependencies = list(dependencies) # add dependencies if addDependencies: code = dependencies + [''] + code # return return dependencies, '\n'.join(code)
Example #22
Source File: MoveGenerator.py From fullrmc with GNU Affero General Public License v3.0 | 5 votes |
def shuffle(self): """ Shuffle flag.""" return self.__shuffle
Example #23
Source File: data.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 5 votes |
def iterate_forever(self, batch_size, num_steps): def file_stream(): while True: file_patterns = glob.glob(self._file_pattern) if not self._shuffle: random.shuffle(file_patterns) for file_name in file_patterns: yield file_name for value in self._iterate(self._sentence_stream(file_stream()), batch_size, num_steps): yield value
Example #24
Source File: data.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 5 votes |
def iterate_once(self, batch_size, num_steps): def file_stream(): file_patterns = glob.glob(self._file_pattern) if not self._shuffle: random.shuffle(file_patterns) for file_name in file_patterns: yield file_name for value in self._iterate(self._sentence_stream(file_stream()), batch_size, num_steps): yield value
Example #25
Source File: imdb.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 5 votes |
def save_imglist(self, fname=None, root=None, shuffle=False): """ save imglist to disk Parameters: ---------- fname : str saved filename """ def progress_bar(count, total, suffix=''): import sys bar_len = 24 filled_len = int(round(bar_len * count / float(total))) percents = round(100.0 * count / float(total), 1) bar = '=' * filled_len + '-' * (bar_len - filled_len) sys.stdout.write('[%s] %s%s ...%s\r' % (bar, percents, '%', suffix)) sys.stdout.flush() str_list = [] for index in range(self.num_images): progress_bar(index, self.num_images) label = self.label_from_index(index) if label.size < 1: continue path = self.image_path_from_index(index) if root: path = osp.relpath(path, root) str_list.append('\t'.join([str(index), str(2), str(label.shape[1])] \ + ["{0:.4f}".format(x) for x in label.ravel()] + [path,]) + '\n') if str_list: if shuffle: import random random.shuffle(str_list) if not fname: fname = self.name + '.lst' with open(fname, 'w') as f: for line in str_list: f.write(line) else: raise RuntimeError("No image in imdb")
Example #26
Source File: data.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 5 votes |
def _parse_file(self, file_name): logging.debug("Processing file: %s" % file_name) with codecs.open(file_name, "r", "utf-8") as f: lines = [line.strip() for line in f] if not self._shuffle: random.shuffle(lines) logging.debug("Finished processing!") for line in lines: yield self._parse_sentence(line)
Example #27
Source File: data.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 5 votes |
def __init__(self, vocab, file_pattern, shuffle=False): self._vocab = vocab self._file_pattern = file_pattern self._shuffle = shuffle
Example #28
Source File: MoveGenerator.py From fullrmc with GNU Affero General Public License v3.0 | 5 votes |
def __init__(self, group=None, combination=None, shuffle=False): # set combination self.__combination = [] # initialize super(MoveGeneratorCombinator, self).__init__(group=group) # set path self.set_combination(combination=combination) # set randomize self.set_shuffle(shuffle=shuffle)
Example #29
Source File: MoveGenerator.py From fullrmc with GNU Affero General Public License v3.0 | 5 votes |
def set_shuffle(self, shuffle): """ Set whether to shuffle moves generator. :Parameters: #. shuffle (boolean): Whether to shuffle generator instances at every move or to combine moves in the list order. """ assert isinstance(shuffle, bool), LOGGER.error("shuffle must be boolean") self.__shuffle = shuffle
Example #30
Source File: 19_char_rnn.py From deep-learning-note with MIT License | 5 votes |
def read_data(filename, vocab, window, overlap): lines = [line.strip() for line in open(filename, 'r').readlines()] while True: random.shuffle(lines) for text in lines: text = vocab_encode(text, vocab) for start in range(0, len(text) - window, overlap): chunk = text[start: start + window] chunk += [0] * (window - len(chunk)) yield chunk # 批量读取数据