Python Examples of random.shuffle

Source File: DataLoader_NER.py From pytorch_NER_BiLSTM_CNN_CRF with Apache License 2.0

6 votes

def __init__(self, path, shuffle, config):
        """
        :param path: data path list
        :param shuffle:  shuffle bool
        :param config:  config
        """
        #
        print("Loading Data......")
        self.data_list = []
        self.max_count = config.max_count
        self.path = path
        self.shuffle = shuffle
        # char feature
        self.pad_char = [char_pad, char_pad]
        # self.pad_char = []
        self.max_char_len = config.max_char_len

Source File: estimator_utils.py From EDeN with MIT License

6 votes

def make_train_test_sets(pos_graphs, neg_graphs,
                         test_proportion=.3, random_state=2):
    """make_train_test_sets."""
    random.seed(random_state)
    random.shuffle(pos_graphs)
    random.shuffle(neg_graphs)
    pos_dim = len(pos_graphs)
    neg_dim = len(neg_graphs)
    tr_pos_graphs = pos_graphs[:-int(pos_dim * test_proportion)]
    te_pos_graphs = pos_graphs[-int(pos_dim * test_proportion):]
    tr_neg_graphs = neg_graphs[:-int(neg_dim * test_proportion)]
    te_neg_graphs = neg_graphs[-int(neg_dim * test_proportion):]
    tr_graphs = tr_pos_graphs + tr_neg_graphs
    te_graphs = te_pos_graphs + te_neg_graphs
    tr_targets = [1] * len(tr_pos_graphs) + [0] * len(tr_neg_graphs)
    te_targets = [1] * len(te_pos_graphs) + [0] * len(te_neg_graphs)
    tr_graphs, tr_targets = paired_shuffle(tr_graphs, tr_targets)
    te_graphs, te_targets = paired_shuffle(te_graphs, te_targets)
    return (tr_graphs, np.array(tr_targets)), (te_graphs, np.array(te_targets))

Source File: utils.py From deep-learning-note with MIT License

6 votes

def data_iter_random(corpus_indices, batch_size, num_steps, device=None):
    # 减1是因为输出的索引x是相应输入的索引y加1
    num_examples = (len(corpus_indices) - 1) // num_steps
    epoch_size = num_examples // batch_size
    example_indices = list(range(num_examples))
    random.shuffle(example_indices)

    # 返回从pos开始的长为num_steps的序列
    def _data(pos):
        return corpus_indices[pos: pos + num_steps]

    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    for i in range(epoch_size):
        # 每次读取batch_size个随机样本
        i = i * batch_size
        batch_indices = example_indices[i: i + batch_size]
        X = [_data(j * num_steps) for j in batch_indices]
        Y = [_data(j * num_steps + 1) for j in batch_indices]
        yield torch.tensor(X, dtype=torch.float32, device=device), torch.tensor(Y, dtype=torch.float32, device=device)

Source File: detection.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def __init__(self, batch_size, data_shape,
                 path_imgrec=None, path_imglist=None, path_root=None, path_imgidx=None,
                 shuffle=False, part_index=0, num_parts=1, aug_list=None, imglist=None,
                 data_name='data', label_name='label', **kwargs):
        super(ImageDetIter, self).__init__(batch_size=batch_size, data_shape=data_shape,
                                           path_imgrec=path_imgrec, path_imglist=path_imglist,
                                           path_root=path_root, path_imgidx=path_imgidx,
                                           shuffle=shuffle, part_index=part_index,
                                           num_parts=num_parts, aug_list=[], imglist=imglist,
                                           data_name=data_name, label_name=label_name)

        if aug_list is None:
            self.auglist = CreateDetAugmenter(data_shape, **kwargs)
        else:
            self.auglist = aug_list

        # went through all labels to get the proper label shape
        label_shape = self._estimate_label_shape()
        self.provide_label = [(label_name, (self.batch_size, label_shape[0], label_shape[1]))]
        self.label_shape = label_shape

Source File: test_recordio.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def test_indexed_recordio():
    fidx = tempfile.mktemp()
    frec = tempfile.mktemp()
    N = 255

    writer = mx.recordio.MXIndexedRecordIO(fidx, frec, 'w')
    for i in range(N):
        if sys.version_info[0] < 3:
            writer.write_idx(i, str(chr(i)))
        else:
            writer.write_idx(i, bytes(str(chr(i)), 'utf-8'))
    del writer

    reader = mx.recordio.MXIndexedRecordIO(fidx, frec, 'r')
    keys = reader.keys
    assert sorted(keys) == [i for i in range(N)]
    random.shuffle(keys)
    for i in keys:
        res = reader.read_idx(i)
        if sys.version_info[0] < 3:
            assert res == str(chr(i))
        else:
            assert res == bytes(str(chr(i)), 'utf-8')

Source File: BasePythonDataLayer.py From Caffe-Python-Data-Layer with BSD 2-Clause "Simplified" License

6 votes

def preload_db(self):
        """Read all images in and all labels

        Implemenation relies on DataManager Classes
        """
        print("Preloading Data...")
        if self._source_type == 'BCF':
            self._data_manager = BCFDataManager(self._layer_params)
        elif self._source_type == 'CSV':
            self._data_manager = CSVDataManager(self._layer_params)
        elif self._source_type == 'LMDB':
            self._data_manager = LMDBDataManager(self._layer_params)
        # read all data
        self._data, self._label = self._data_manager.load_all()
        self._sample_count = len(self._data)
        if self._shuffle:
            self.shuffle()

Source File: BasePythonDataLayer.py From Caffe-Python-Data-Layer with BSD 2-Clause "Simplified" License

6 votes

def setup(self, bottom, top):
        layer_params = yaml.load(self.param_str)
        self._layer_params = layer_params
        # default batch_size = 256
        self._batch_size = int(layer_params.get('batch_size', 256))
        self._resize = layer_params.get('resize', -1)
        self._mean_file = layer_params.get('mean_file', None)
        self._source_type = layer_params.get('source_type', 'CSV')
        self._shuffle = layer_params.get('shuffle', False)
        # read image_mean from file and preload all data into memory
        # will read either file or array into self._mean
        self.set_mean()
        self.preload_db()
        self._compressed = self._layer_params.get('compressed', True)
        if not self._compressed:
            self.decompress_data()

Source File: 30_series_sampling.py From deep-learning-note with MIT License

6 votes

def data_iter_random(corpus_indices, batch_size, num_steps, device=None):
    # 减1是因为输出的索引x是相应输入的索引y加1
    num_examples = (len(corpus_indices) - 1) // num_steps
    epoch_size = num_examples // batch_size
    example_indices = list(range(num_examples))
    random.shuffle(example_indices)

    # 返回从pos开始的长为num_steps的序列
    def _data(pos):
        return corpus_indices[pos: pos + num_steps]
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    for i in range(epoch_size):
        # 每次读取batch_size个随机样本
        i = i * batch_size
        batch_indices = example_indices[i: i + batch_size]
        X = [_data(j * num_steps) for j in batch_indices]
        Y = [_data(j * num_steps + 1) for j in batch_indices]
        yield torch.tensor(X, dtype=torch.float32, device=device), torch.tensor(Y, dtype=torch.float32, device=device)

Source File: utils.py From deep-learning-note with MIT License

6 votes

def load_data_fashion_mnist(batch_size, resize=None, root='./data'):
    """Download the fashion mnist dataset and then load into memory."""
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
    trans.append(torchvision.transforms.ToTensor())

    transform = torchvision.transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
    if sys.platform.startswith('win'):
        num_workers = 0  # 0表示不用额外的进程来加速读取数据
    else:
        num_workers = 4
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_iter, test_iter

Source File: data.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def get_caltech101_iterator(batch_size, num_workers, dtype):
    def transform(image, label):
        # resize the shorter edge to 224, the longer edge will be greater or equal to 224
        resized = mx.image.resize_short(image, 224)
        # center and crop an area of size (224,224)
        cropped, crop_info = mx.image.center_crop(resized, (224, 224))
        # transpose the channels to be (3,224,224)
        transposed = mx.nd.transpose(cropped, (2, 0, 1))
        return transposed, label

    training_path, testing_path = get_caltech101_data()
    dataset_train = ImageFolderDataset(root=training_path, transform=transform)
    dataset_test = ImageFolderDataset(root=testing_path, transform=transform)

    train_data = DataLoader(dataset_train, batch_size, shuffle=True, num_workers=num_workers)
    test_data = DataLoader(dataset_test, batch_size, shuffle=False, num_workers=num_workers)
    return DataLoaderIter(train_data), DataLoaderIter(test_data)

Source File: atomic.py From comet-commonsense with Apache License 2.0

6 votes

def shuffle_sequences(self, split="train", keys=None):
        if keys is None:
            # print(type(self.data))
            # print(type(self.data.keys()))
            keys = self.data[split].keys()

        for key in keys:
            idxs = list(range(len(self.data[split][key])))

            random.shuffle(idxs)

            self.sequences[split][key] = \
                self.sequences[split][key].index_select(
                    0, torch.LongTensor(idxs))

            temp = [self.data[split][key][i] for i in idxs]
            self.data[split][key] = temp
            temp = [self.masks[split][key][i] for i in idxs]
            self.masks[split][key] = temp

Source File: conceptnet.py From comet-commonsense with Apache License 2.0

6 votes

def shuffle_sequences(self, split="train", keys=None):
        if keys is None:
            # print(type(self.data))
            # print(type(self.data.keys()))
            keys = self.data[split].keys()

        for key in keys:
            if key in ["positive", "negative"]:
                continue
            idxs = list(range(len(self.data[split][key])))

            random.shuffle(idxs)

            self.sequences[split][key] = \
                self.sequences[split][key].index_select(
                    0, torch.LongTensor(idxs))

            temp = [self.data[split][key][i] for i in idxs]
            self.data[split][key] = temp

            temp = [self.masks[split][key][i] for i in idxs]
            self.masks[split][key] = temp

Source File: DataLoader_NER.py From pytorch_NER_BiLSTM_CNN_CRF with Apache License 2.0

6 votes

def dataLoader(self):
        """
        :return:
        """
        path = self.path
        shuffle = self.shuffle
        assert isinstance(path, list), "Path Must Be In List"
        print("Data Path {}".format(path))
        for id_data in range(len(path)):
            print("Loading Data Form {}".format(path[id_data]))
            insts = self._Load_Each_Data(path=path[id_data], shuffle=shuffle)
            random.shuffle(insts)
            self._write_shuffle_inst_to_file(insts, path=path[id_data])
            self.data_list.append(insts)
        # return train/dev/test data
        if len(self.data_list) == 3:
            return self.data_list[0], self.data_list[1], self.data_list[2]
        elif len(self.data_list) == 2:
            return self.data_list[0], self.data_list[1]

Source File: data.py From VSE-C with MIT License

6 votes

def get_loader_single(data_name, split, root, json, vocab, transform,
                      batch_size=100, shuffle=True,
                      num_workers=2, ids=None, collate_fn=collate_fn):
    """Returns torch.utils.data.DataLoader for custom coco dataset."""
    if 'coco' in data_name:
        # COCO custom dataset
        dataset = CocoDataset(root=root,
                              json=json,
                              vocab=vocab,
                              transform=transform, ids=ids)
    elif 'f8k' in data_name or 'f30k' in data_name:
        dataset = FlickrDataset(root=root,
                                split=split,
                                json=json,
                                vocab=vocab,
                                transform=transform)

    # Data loader
    data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                              batch_size=batch_size,
                                              shuffle=shuffle,
                                              pin_memory=True,
                                              num_workers=num_workers,
                                              collate_fn=collate_fn)
    return data_loader

Source File: chainer_alex.py From mlimages with MIT License

6 votes

def show(limit, shuffle=True):
    td = TrainingData(LABEL_FILE, img_root=IMAGES_ROOT, mean_image_file=MEAN_IMAGE_FILE, image_property=IMAGE_PROP)
    _limit = limit if limit > 0 else 5
    iterator = td.generate()
    if shuffle:
        import random
        shuffled = list(iterator)
        random.shuffle(shuffled)
        iterator = iter(shuffled)

    i = 0
    for arr, im in iterator:
        restored = td.data_to_image(arr, im.label, raw=True)
        print(im.path)
        restored.image.show()
        i += 1
        if i >= _limit:
            break

Source File: concat_db.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def _load_image_set_index(self, shuffle):
        """
        get total number of images, init indices

        Parameters
        ----------
        shuffle : bool
            whether to shuffle the initial indices
        """
        self.num_images = 0
        for db in self.imdbs:
            self.num_images += db.num_images
        indices = list(range(self.num_images))
        if shuffle:
            random.shuffle(indices)
        return indices

Source File: iterators.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def reset(self):
        """Resets the iterator to the beginning of the data."""
        self.curr_idx = 0
        #shuffle data in each bucket
        random.shuffle(self.idx)
        for i, buck in enumerate(self.sentences):
            self.indices[i], self.sentences[i], self.characters[i], self.label[i] = shuffle(self.indices[i],
                                                                                            self.sentences[i],
                                                                                            self.characters[i],
                                                                                            self.label[i])

        self.ndindex = []
        self.ndsent = []
        self.ndchar = []
        self.ndlabel = []

        #for each bucket of data
        for i, buck in enumerate(self.sentences):
            #append the lists with an array
            self.ndindex.append(ndarray.array(self.indices[i], dtype=self.dtype))
            self.ndsent.append(ndarray.array(self.sentences[i], dtype=self.dtype))
            self.ndchar.append(ndarray.array(self.characters[i], dtype=self.dtype))
            self.ndlabel.append(ndarray.array(self.label[i], dtype=self.dtype))

Source File: turing.py From gated-graph-transformer-network with MIT License

6 votes

def encode_turing_machine_rules(rules, starting_state=None, story=None):
    if story is None:
        story = graph_tools.Story()
    graph = story.graph
    if starting_state is None:
        starting_state = random.choice(len(rules))
    the_edges = [(cstate, read, write, nstate, direc)
                    for (cstate, stuff) in enumerate(rules)
                    for (read, (write, nstate, direc)) in enumerate(stuff)]
    random.shuffle(the_edges)
    for cstate, read, write, nstate, direc in the_edges:
        source = graph.make_unique('state_{}'.format(cstate))
        dest = graph.make_unique('state_{}'.format(nstate))
        edge_type = "rule_{}_{}_{}".format(read,write,direc)
        source[edge_type] = dest
        story.add_line("rule {} {} {} {} {}".format(source.type, read, write, dest.type, direc))
    head = graph.make_unique('head')

    head.state = graph.make_unique('state_{}'.format(starting_state))
    story.add_line("start {}".format(head.state.type))
    return story

Source File: MoveGenerator.py From fullrmc with GNU Affero General Public License v3.0

6 votes

def move(self, coordinates):
        """
        Move coordinates.

        :Parameters:
            #. coordinates (np.ndarray): The coordinates on which to apply
               the transformation.

        :Returns:
            #. coordinates (np.ndarray): The new coordinates after applying
               the transformation.
        """
        indexes = range(len(self.__combination))
        if self.__shuffle:
            shuffle( indexes )
        # create the move combination
        for idx in indexes:
            coordinates = self.__combination[idx].move(coordinates)
        return coordinates

Source File: data.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def get_imagenet_iterator(root, batch_size, num_workers, data_shape=224, dtype='float32'):
    """Dataset loader with preprocessing."""
    train_dir = os.path.join(root, 'train')
    train_transform, val_transform = get_imagenet_transforms(data_shape, dtype)
    logging.info("Loading image folder %s, this may take a bit long...", train_dir)
    train_dataset = ImageFolderDataset(train_dir, transform=train_transform)
    train_data = DataLoader(train_dataset, batch_size, shuffle=True,
                            last_batch='discard', num_workers=num_workers)
    val_dir = os.path.join(root, 'val')
    if not os.path.isdir(os.path.expanduser(os.path.join(root, 'val', 'n01440764'))):
        user_warning = 'Make sure validation images are stored in one subdir per category, a helper script is available at https://git.io/vNQv1'
        raise ValueError(user_warning)
    logging.info("Loading image folder %s, this may take a bit long...", val_dir)
    val_dataset = ImageFolderDataset(val_dir, transform=val_transform)
    val_data = DataLoader(val_dataset, batch_size, last_batch='keep', num_workers=num_workers)
    return DataLoaderIter(train_data, dtype), DataLoaderIter(val_data, dtype)

Source File: MoveGenerator.py From fullrmc with GNU Affero General Public License v3.0

5 votes

def _codify__(self, name='generator', group=None, addDependencies=True):
        assert isinstance(name, basestring), LOGGER.error("name must be a string")
        assert re.match('[a-zA-Z_][a-zA-Z0-9_]*$', name) is not None, LOGGER.error("given name '%s' can't be used as a variable name"%name)
        dependencies = collections.OrderedDict()
        dependencies['from fullrmc.Core import MoveGenerator'] = True
        code         = []
        combination  = []
        # codify generators
        for idx, gen in enumerate(self.__combination):
            nm      = '%s_%i'%(name,idx)
            dep, cd = gen._codify__(group=None, name=nm, addDependencies=True)
            code.append(cd)
            combination.append(nm)
            for d in dep:
                _ = dependencies.setdefault(d,True)
        # codify combinator
        code.append("{name} = MoveGenerator.MoveGeneratorCombinator\
(group={group}, combination=[{combination}], shuffle={shuffle})"
.format(name=name, group=group, combination=', '.join(combination), shuffle=self.shuffle))
        # set dependencies
        dependencies = list(dependencies)
        # add dependencies
        if addDependencies:
            code = dependencies + [''] + code
        # return
        return dependencies, '\n'.join(code)

Source File: MoveGenerator.py From fullrmc with GNU Affero General Public License v3.0

5 votes

def shuffle(self):
        """ Shuffle flag."""
        return self.__shuffle

Source File: data.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

5 votes

def iterate_forever(self, batch_size, num_steps):
        def file_stream():
            while True:
                file_patterns = glob.glob(self._file_pattern)
                if not self._shuffle:
                    random.shuffle(file_patterns)
                for file_name in file_patterns:
                    yield file_name
        for value in self._iterate(self._sentence_stream(file_stream()), batch_size, num_steps):
            yield value

Source File: data.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

5 votes

def iterate_once(self, batch_size, num_steps):
        def file_stream():
            file_patterns = glob.glob(self._file_pattern)
            if not self._shuffle:
                random.shuffle(file_patterns)
            for file_name in file_patterns:
                yield file_name
        for value in self._iterate(self._sentence_stream(file_stream()), batch_size, num_steps):
            yield value

Source File: imdb.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

5 votes

def save_imglist(self, fname=None, root=None, shuffle=False):
        """
        save imglist to disk

        Parameters:
        ----------
        fname : str
            saved filename
        """
        def progress_bar(count, total, suffix=''):
            import sys
            bar_len = 24
            filled_len = int(round(bar_len * count / float(total)))

            percents = round(100.0 * count / float(total), 1)
            bar = '=' * filled_len + '-' * (bar_len - filled_len)
            sys.stdout.write('[%s] %s%s ...%s\r' % (bar, percents, '%', suffix))
            sys.stdout.flush()

        str_list = []
        for index in range(self.num_images):
            progress_bar(index, self.num_images)
            label = self.label_from_index(index)
            if label.size < 1:
                continue
            path = self.image_path_from_index(index)
            if root:
                path = osp.relpath(path, root)
            str_list.append('\t'.join([str(index), str(2), str(label.shape[1])] \
              + ["{0:.4f}".format(x) for x in label.ravel()] + [path,]) + '\n')
        if str_list:
            if shuffle:
                import random
                random.shuffle(str_list)
            if not fname:
                fname = self.name + '.lst'
            with open(fname, 'w') as f:
                for line in str_list:
                    f.write(line)
        else:
            raise RuntimeError("No image in imdb")

Source File: data.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

5 votes

def _parse_file(self, file_name):
        logging.debug("Processing file: %s" % file_name)
        with codecs.open(file_name, "r", "utf-8") as f:
            lines = [line.strip() for line in f]
            if not self._shuffle:
                random.shuffle(lines)
            logging.debug("Finished processing!")
            for line in lines:
                yield self._parse_sentence(line)

Source File: data.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

5 votes

def __init__(self, vocab, file_pattern, shuffle=False):
        self._vocab = vocab
        self._file_pattern = file_pattern
        self._shuffle = shuffle

Source File: MoveGenerator.py From fullrmc with GNU Affero General Public License v3.0

5 votes

def __init__(self, group=None, combination=None, shuffle=False):
        # set combination
        self.__combination = []
        # initialize
        super(MoveGeneratorCombinator, self).__init__(group=group)
        # set path
        self.set_combination(combination=combination)
        # set randomize
        self.set_shuffle(shuffle=shuffle)

Source File: MoveGenerator.py From fullrmc with GNU Affero General Public License v3.0

5 votes

def set_shuffle(self, shuffle):
        """
        Set whether to shuffle moves generator.

        :Parameters:
            #. shuffle (boolean): Whether to shuffle generator instances at
               every move or to combine moves in the list order.
        """
        assert isinstance(shuffle, bool), LOGGER.error("shuffle must be boolean")
        self.__shuffle = shuffle

Source File: 19_char_rnn.py From deep-learning-note with MIT License

5 votes

def read_data(filename, vocab, window, overlap):
    lines = [line.strip() for line in open(filename, 'r').readlines()]
    while True:
        random.shuffle(lines)

        for text in lines:
            text = vocab_encode(text, vocab)
            for start in range(0, len(text) - window, overlap):
                chunk = text[start: start + window]
                chunk += [0] * (window - len(chunk))
                yield chunk


# 批量读取数据

Python random.shuffle() Examples