Python fuel.schemes.ShuffledScheme() Examples

The following are 16 code examples of fuel.schemes.ShuffledScheme(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module fuel.schemes , or try the search function .
Example #1
Source File: load.py    From iGAN with MIT License 6 votes vote down vote up
def load_imgs(ntrain=None, ntest=None, batch_size=128, data_file=None):
    t = time()
    print('LOADING DATASET...')
    path = os.path.join(data_file)
    tr_data = H5PYDataset(path, which_sets=('train',))
    te_data = H5PYDataset(path, which_sets=('test',))

    if ntrain is None:
        ntrain = tr_data.num_examples
    else:
        ntrain = min(ntrain, tr_data.num_examples)

    if ntest is None:
        ntest = te_data.num_examples
    else:
        ntest = min(ntest, te_data.num_examples)
    print('name = %s, ntrain = %d, ntest = %d' % (data_file, ntrain, ntest))

    tr_scheme = ShuffledScheme(examples=ntrain, batch_size=batch_size)
    tr_stream = DataStream(tr_data, iteration_scheme=tr_scheme)

    te_scheme = ShuffledScheme(examples=ntest, batch_size=batch_size)
    te_stream = DataStream(te_data, iteration_scheme=te_scheme)
    print('%.2f secs to load data' % (time() - t))
    return tr_data, te_data, tr_stream, te_stream, ntrain, ntest 
Example #2
Source File: load.py    From dcgan_code with MIT License 6 votes vote down vote up
def faces(ntrain=None, nval=None, ntest=None, batch_size=128):
    path = os.path.join(data_dir, 'faces_364293_128px.hdf5')
    tr_data = H5PYDataset(path, which_sets=('train',))
    te_data = H5PYDataset(path, which_sets=('test',))

    if ntrain is None:
        ntrain = tr_data.num_examples
    if ntest is None:
        ntest = te_data.num_examples
    if nval is None:
        nval = te_data.num_examples

    tr_scheme = ShuffledScheme(examples=ntrain, batch_size=batch_size)
    tr_stream = DataStream(tr_data, iteration_scheme=tr_scheme)

    te_scheme = SequentialScheme(examples=ntest, batch_size=batch_size)
    te_stream = DataStream(te_data, iteration_scheme=te_scheme)

    val_scheme = SequentialScheme(examples=nval, batch_size=batch_size)
    val_stream = DataStream(tr_data, iteration_scheme=val_scheme)
    return tr_data, te_data, tr_stream, val_stream, te_stream 
Example #3
Source File: utils.py    From blocks-char-rnn with MIT License 5 votes vote down vote up
def get_stream(hdf5_file, which_set, batch_size=None):
    dataset = H5PYDataset(
        hdf5_file, which_sets=(which_set,), load_in_memory=True)
    if batch_size == None:
        batch_size = dataset.num_examples
    stream = DataStream(dataset=dataset, iteration_scheme=ShuffledScheme(
        examples=dataset.num_examples, batch_size=batch_size))
    # Required because Recurrent bricks receive as input [sequence, batch,
    # features]
    return Mapping(stream, transpose_stream) 
Example #4
Source File: test_datasets.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_batch_iteration_scheme_with_lists(self):
        """Batch schemes should work with more than ndarrays."""
        data = IndexableDataset(OrderedDict([('foo', list(range(50))),
                                             ('bar', list(range(1, 51)))]))
        stream = DataStream(data,
                            iteration_scheme=ShuffledScheme(data.num_examples,
                                                            5))
        returned = [sum(batches, []) for batches in
                    zip(*list(stream.get_epoch_iterator()))]
        assert set(returned[0]) == set(range(50))
        assert set(returned[1]) == set(range(1, 51)) 
Example #5
Source File: test_image.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def common_setup(self):
        ex_scheme = SequentialExampleScheme(self.dataset.num_examples)
        self.example_stream = DataStream(self.dataset,
                                         iteration_scheme=ex_scheme)
        self.batch_size = 2
        scheme = ShuffledScheme(self.dataset.num_examples,
                                batch_size=self.batch_size)
        self.batch_stream = DataStream(self.dataset, iteration_scheme=scheme) 
Example #6
Source File: dataset.py    From kerosene with MIT License 5 votes vote down vote up
def fuel_data_to_list(fuel_data, shuffle):
    if(shuffle):
        scheme = ShuffledScheme(fuel_data.num_examples, fuel_data.num_examples)
    else:
        scheme = SequentialScheme(fuel_data.num_examples, fuel_data.num_examples)
    fuel_data_stream = DataStream.default_stream(fuel_data, iteration_scheme=scheme)
    return next(fuel_data_stream.get_epoch_iterator()) 
Example #7
Source File: test_datasets.py    From fuel with MIT License 5 votes vote down vote up
def test_batch_iteration_scheme_with_lists(self):
        """Batch schemes should work with more than ndarrays."""
        data = IndexableDataset(OrderedDict([('foo', list(range(50))),
                                             ('bar', list(range(1, 51)))]))
        stream = DataStream(data,
                            iteration_scheme=ShuffledScheme(data.num_examples,
                                                            5))
        returned = [sum(batches, []) for batches in
                    zip(*list(stream.get_epoch_iterator()))]
        assert set(returned[0]) == set(range(50))
        assert set(returned[1]) == set(range(1, 51)) 
Example #8
Source File: test_image.py    From fuel with MIT License 5 votes vote down vote up
def common_setup(self):
        ex_scheme = SequentialExampleScheme(self.dataset.num_examples)
        self.example_stream = DataStream(self.dataset,
                                         iteration_scheme=ex_scheme)
        self.batch_size = 2
        scheme = ShuffledScheme(self.dataset.num_examples,
                                batch_size=self.batch_size)
        self.batch_stream = DataStream(self.dataset, iteration_scheme=scheme) 
Example #9
Source File: utils.py    From diagnose-heart with MIT License 5 votes vote down vote up
def streamer(self, training=True, shuffled=False):
        n = self.ntrain if training else self.ntest
        if n==0:
            return None;
        func = ShuffledScheme if shuffled else SequentialScheme
        sch = func(examples=n, batch_size=self.batch_size)
        data = self.tr_data if training else self.te_data
        return DataStream(data, iteration_scheme = sch)

# helper function for building vae's 
Example #10
Source File: utils.py    From diagnose-heart with MIT License 5 votes vote down vote up
def streamer(self, training=True, shuffled=False):
        n = self.ntrain if training else self.ntest
        sch = ShuffledScheme(examples=n, batch_size=self.batch_size) if shuffled else \
                SequentialScheme(examples=n, batch_size=self.batch_size)
        return DataStream(self.tr_data if training else self.te_data, \
                iteration_scheme = sch)

# helper function for building vae's 
Example #11
Source File: utils.py    From video_predict with MIT License 5 votes vote down vote up
def streamer(self, training=True, shuffled=False):
        n = self.ntrain if training else self.ntest
        sch = ShuffledScheme(examples=n, batch_size=self.batch_size) if shuffled else \
                SequentialScheme(examples=n, batch_size=self.batch_size)
        return DataStream(self.tr_data if training else self.te_data, \
                iteration_scheme = sch)

# helper function for building vae's 
Example #12
Source File: streams.py    From PacGAN with MIT License 5 votes vote down vote up
def create_packing_VEEGAN1200D_data_streams(num_packings, batch_size, monitoring_batch_size, rng=None, num_examples=100000, sources=('features', )):

    train_set = VEEGAN1200DPackingMixture(num_packings=num_packings, num_examples=num_examples, rng=rng, sources=sources)

    valid_set = VEEGAN1200DPackingMixture(num_packings=num_packings, num_examples=num_examples, rng=rng, sources=sources)

    main_loop_stream = DataStream(train_set, iteration_scheme=ShuffledScheme(train_set.num_examples, batch_size=batch_size, rng=rng))

    train_monitor_stream = DataStream(train_set, iteration_scheme=ShuffledScheme(5000, batch_size, rng=rng))

    valid_monitor_stream = DataStream(valid_set, iteration_scheme=ShuffledScheme(5000, batch_size, rng=rng))

    return main_loop_stream, train_monitor_stream, valid_monitor_stream 
Example #13
Source File: streams.py    From PacGAN with MIT License 5 votes vote down vote up
def create_packing_gaussian_mixture_data_streams(num_packings, batch_size, monitoring_batch_size, means=None, variances=None, priors=None, rng=None, num_examples=100000, sources=('features', )):

    train_set = GaussianPackingMixture(num_packings=num_packings, num_examples=num_examples, means=means, variances=variances, priors=priors, rng=rng, sources=sources)

    valid_set = GaussianPackingMixture(num_packings=num_packings, num_examples=num_examples, means=means, variances=variances, priors=priors, rng=rng, sources=sources)

    main_loop_stream = DataStream(train_set, iteration_scheme=ShuffledScheme(train_set.num_examples, batch_size=batch_size, rng=rng))

    train_monitor_stream = DataStream(train_set, iteration_scheme=ShuffledScheme(5000, batch_size, rng=rng))

    valid_monitor_stream = DataStream(valid_set, iteration_scheme=ShuffledScheme(5000, batch_size, rng=rng))

    return main_loop_stream, train_monitor_stream, valid_monitor_stream 
Example #14
Source File: run.py    From ladder with MIT License 4 votes vote down vote up
def make_datastream(dataset, indices, batch_size,
                    n_labeled=None, n_unlabeled=None,
                    balanced_classes=True, whiten=None, cnorm=None,
                    scheme=ShuffledScheme):
    if n_labeled is None or n_labeled == 0:
        n_labeled = len(indices)
    if batch_size is None:
        batch_size = len(indices)
    if n_unlabeled is None:
        n_unlabeled = len(indices)
    assert n_labeled <= n_unlabeled, 'need less labeled than unlabeled'

    if balanced_classes and n_labeled < n_unlabeled:
        # Ensure each label is equally represented
        logger.info('Balancing %d labels...' % n_labeled)
        all_data = dataset.data_sources[dataset.sources.index('targets')]
        y = unify_labels(all_data)[indices]
        n_classes = y.max() + 1
        assert n_labeled % n_classes == 0
        n_from_each_class = n_labeled / n_classes

        i_labeled = []
        for c in range(n_classes):
            i = (indices[y == c])[:n_from_each_class]
            i_labeled += list(i)
    else:
        i_labeled = indices[:n_labeled]

    # Get unlabeled indices
    i_unlabeled = indices[:n_unlabeled]

    ds = SemiDataStream(
        data_stream_labeled=Whitening(
            DataStream(dataset),
            iteration_scheme=scheme(i_labeled, batch_size),
            whiten=whiten, cnorm=cnorm),
        data_stream_unlabeled=Whitening(
            DataStream(dataset),
            iteration_scheme=scheme(i_unlabeled, batch_size),
            whiten=whiten, cnorm=cnorm)
    )
    return ds 
Example #15
Source File: fuel_helper.py    From plat with MIT License 4 votes vote down vote up
def create_streams(train_set, valid_set, test_set, training_batch_size,
                   monitoring_batch_size):
    """Creates data streams for training and monitoring.

    Parameters
    ----------
    train_set : :class:`fuel.datasets.Dataset`
        Training set.
    valid_set : :class:`fuel.datasets.Dataset`
        Validation set.
    test_set : :class:`fuel.datasets.Dataset`
        Test set.
    monitoring_batch_size : int
        Batch size for monitoring.
    include_targets : bool
        If ``True``, use both features and targets. If ``False``, use
        features only.

    Returns
    -------
    rval : tuple of data streams
        Data streams for the main loop, the training set monitor,
        the validation set monitor and the test set monitor.

    """
    main_loop_stream = DataStream.default_stream(
        dataset=train_set,
        iteration_scheme=ShuffledScheme(
            train_set.num_examples, training_batch_size))
    train_monitor_stream = DataStream.default_stream(
        dataset=train_set,
        iteration_scheme=ShuffledScheme(
            train_set.num_examples, monitoring_batch_size))
    valid_monitor_stream = DataStream.default_stream(
        dataset=valid_set,
        iteration_scheme=SequentialScheme(
            valid_set.num_examples, monitoring_batch_size))
    test_monitor_stream = DataStream.default_stream(
        dataset=test_set,
        iteration_scheme=SequentialScheme(
            test_set.num_examples, monitoring_batch_size))

    return (main_loop_stream, train_monitor_stream, valid_monitor_stream,
            test_monitor_stream) 
Example #16
Source File: utils.py    From discgen with MIT License 4 votes vote down vote up
def create_streams(train_set, valid_set, test_set, training_batch_size,
                   monitoring_batch_size):
    """Creates data streams for training and monitoring.

    Parameters
    ----------
    train_set : :class:`fuel.datasets.Dataset`
        Training set.
    valid_set : :class:`fuel.datasets.Dataset`
        Validation set.
    test_set : :class:`fuel.datasets.Dataset`
        Test set.
    monitoring_batch_size : int
        Batch size for monitoring.
    include_targets : bool
        If ``True``, use both features and targets. If ``False``, use
        features only.

    Returns
    -------
    rval : tuple of data streams
        Data streams for the main loop, the training set monitor,
        the validation set monitor and the test set monitor.

    """
    main_loop_stream = DataStream.default_stream(
        dataset=train_set,
        iteration_scheme=ShuffledScheme(
            train_set.num_examples, training_batch_size))
    train_monitor_stream = DataStream.default_stream(
        dataset=train_set,
        iteration_scheme=ShuffledScheme(
            train_set.num_examples, monitoring_batch_size))
    valid_monitor_stream = DataStream.default_stream(
        dataset=valid_set,
        iteration_scheme=ShuffledScheme(
            valid_set.num_examples, monitoring_batch_size))
    test_monitor_stream = DataStream.default_stream(
        dataset=test_set,
        iteration_scheme=ShuffledScheme(
            test_set.num_examples, monitoring_batch_size))

    return (main_loop_stream, train_monitor_stream, valid_monitor_stream,
            test_monitor_stream)