Python fuel.streams.DataStream() Examples
The following are 30
code examples of fuel.streams.DataStream().
Example #1
Source File: From iGAN with MIT License | 6 votes |
def load_imgs_seq(ntrain=None, ntest=None, batch_size=128, data_file=None): t = time() print('LOADING DATASET...') path = os.path.join(data_file) tr_data = H5PYDataset(path, which_sets=('train',)) te_data = H5PYDataset(path, which_sets=('test',)) if ntrain is None: ntrain = tr_data.num_examples if ntest is None: ntest = te_data.num_examples tr_scheme = SequentialScheme(examples=ntrain, batch_size=batch_size) tr_stream = DataStream(tr_data, iteration_scheme=tr_scheme) te_scheme = SequentialScheme(examples=ntest, batch_size=batch_size) te_stream = DataStream(te_data, iteration_scheme=te_scheme) print('name = %s, ntrain = %d, ntest = %d' % (data_file, ntrain, ntest)) print('%.2f seconds to load data' % (time() - t)) return tr_data, te_data, tr_stream, te_stream, ntrain, ntest
Example #2
Source File: From CTC-LSTM with Apache License 2.0 | 6 votes |
def setup_datastream(path, batch_size, sort_batch_count, valid=False): A = numpy.load(os.path.join(path, ('valid_x_raw.npy' if valid else 'train_x_raw.npy'))) B = numpy.load(os.path.join(path, ('valid_phn.npy' if valid else 'train_phn.npy'))) C = numpy.load(os.path.join(path, ('valid_seq_to_phn.npy' if valid else 'train_seq_to_phn.npy'))) D = [B[x[0]:x[1], 2] for x in C] ds = IndexableDataset({'input': A, 'output': D}) stream = DataStream(ds, iteration_scheme=ShuffledExampleScheme(len(A))) stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size * sort_batch_count)) comparison = _balanced_batch_helper(stream.sources.index('input')) stream = Mapping(stream, SortMapping(comparison)) stream = Unpack(stream) stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size, num_examples=len(A))) stream = Padding(stream, mask_sources=['input', 'output']) return ds, stream
Example #3
Source File: From dl4mt-multi with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_dev_streams(config): """Setup development set stream if necessary.""" dev_streams = {} for cg in config['cgs']: if 'val_sets' in config and cg in config['val_sets']:'Building development stream for cg:[{}]'.format(cg)) eid = p_(cg)[0] dev_file = config['val_sets'][cg] # Get dictionary and fix EOS dictionary = cPickle.load(open(config['src_vocabs'][eid])) dictionary['<S>'] = 0 dictionary['<UNK>'] = config['unk_id'] dictionary['</S>'] = config['src_eos_idxs'][eid] # Get as a text file and convert it into a stream dev_dataset = TextFile([dev_file], dictionary, None) dev_streams[cg] = DataStream(dev_dataset) return dev_streams
Example #4
Source File: From DeepMind-Teaching-Machines-to-Read-and-Comprehend with MIT License | 6 votes |
def setup_datastream(path, vocab_file, config): ds = QADataset(path, vocab_file, config.n_entities, need_sep_token=config.concat_ctx_and_question) it = QAIterator(path, shuffle=config.shuffle_questions) stream = DataStream(ds, iteration_scheme=it) if config.concat_ctx_and_question: stream = ConcatCtxAndQuestion(stream, config.concat_question_before, ds.reverse_vocab['<SEP>']) # Sort sets of multiple batches to make batches of similar sizes stream = Batch(stream, iteration_scheme=ConstantScheme(config.batch_size * config.sort_batch_count)) comparison = _balanced_batch_helper(stream.sources.index('question' if config.concat_ctx_and_question else 'context')) stream = Mapping(stream, SortMapping(comparison)) stream = Unpack(stream) stream = Batch(stream, iteration_scheme=ConstantScheme(config.batch_size)) stream = Padding(stream, mask_sources=['context', 'question', 'candidates'], mask_dtype='int32') return ds, stream
Example #5
Source File: From dcgan_code with MIT License | 6 votes |
def faces(ntrain=None, nval=None, ntest=None, batch_size=128): path = os.path.join(data_dir, 'faces_364293_128px.hdf5') tr_data = H5PYDataset(path, which_sets=('train',)) te_data = H5PYDataset(path, which_sets=('test',)) if ntrain is None: ntrain = tr_data.num_examples if ntest is None: ntest = te_data.num_examples if nval is None: nval = te_data.num_examples tr_scheme = ShuffledScheme(examples=ntrain, batch_size=batch_size) tr_stream = DataStream(tr_data, iteration_scheme=tr_scheme) te_scheme = SequentialScheme(examples=ntest, batch_size=batch_size) te_stream = DataStream(te_data, iteration_scheme=te_scheme) val_scheme = SequentialScheme(examples=nval, batch_size=batch_size) val_stream = DataStream(tr_data, iteration_scheme=val_scheme) return tr_data, te_data, tr_stream, val_stream, te_stream
Example #6
Source File: From iGAN with MIT License | 6 votes |
def load_imgs(ntrain=None, ntest=None, batch_size=128, data_file=None): t = time() print('LOADING DATASET...') path = os.path.join(data_file) tr_data = H5PYDataset(path, which_sets=('train',)) te_data = H5PYDataset(path, which_sets=('test',)) if ntrain is None: ntrain = tr_data.num_examples else: ntrain = min(ntrain, tr_data.num_examples) if ntest is None: ntest = te_data.num_examples else: ntest = min(ntest, te_data.num_examples) print('name = %s, ntrain = %d, ntest = %d' % (data_file, ntrain, ntest)) tr_scheme = ShuffledScheme(examples=ntrain, batch_size=batch_size) tr_stream = DataStream(tr_data, iteration_scheme=tr_scheme) te_scheme = ShuffledScheme(examples=ntest, batch_size=batch_size) te_stream = DataStream(te_data, iteration_scheme=te_scheme) print('%.2f secs to load data' % (time() - t)) return tr_data, te_data, tr_stream, te_stream, ntrain, ntest
Example #7
Source File: From attention-lvcsr with MIT License | 5 votes |
def setUp(self): data = range(10) = Batch( DataStream(IterableDataset(data)), iteration_scheme=ConstantScheme(2)) data_np = numpy.arange(10) self.stream_np = Batch( DataStream(IterableDataset(data_np)), iteration_scheme=ConstantScheme(2))
Example #8
Source File: From fuel with MIT License | 5 votes |
def test_axis_labels_on_produces_examples(self): axis_labels = {'data': ('batch', 'features')} self.dataset.axis_labels = axis_labels stream = DataStream(self.dataset) assert_equal(stream.axis_labels, {'data': ('features',)})
Example #9
Source File: From fuel with MIT License | 5 votes |
def test_no_axis_labels(self): stream = DataStream(self.dataset) assert stream.axis_labels is None
Example #10
Source File: From fuel with MIT License | 5 votes |
def test_sources_setter(self): stream = DataStream(self.dataset) stream.sources = ('features',) assert_equal(stream.sources, ('features',))
Example #11
Source File: From fuel with MIT License | 5 votes |
def test_sources_selection(): features = [5, 6, 7, 1] targets = [1, 0, 1, 1] stream = DataStream(IterableDataset(OrderedDict( [('features', features), ('targets', targets)]))) assert list(stream.get_epoch_iterator()) == list(zip(features, targets)) stream = DataStream(IterableDataset( {'features': features, 'targets': targets}, sources=('targets',))) assert list(stream.get_epoch_iterator()) == list(zip(targets))
Example #12
Source File: From fuel with MIT License | 5 votes |
def test_axis_labels_on_produces_batches(self): dataset = IndexableDataset(numpy.eye(2)) axis_labels = {'data': ('batch', 'features')} dataset.axis_labels = axis_labels stream = DataStream(dataset, iteration_scheme=SequentialScheme(2, 2)) assert_equal(stream.axis_labels, axis_labels)
Example #13
Source File: From attention-lvcsr with MIT License | 5 votes |
def test_text(): # Test word level and epochs. with tempfile.NamedTemporaryFile(mode='w', delete=False) as f: sentences1 = f.write("This is a sentence\n") f.write("This another one") with tempfile.NamedTemporaryFile(mode='w', delete=False) as f: sentences2 = f.write("More sentences\n") f.write("The last one") dictionary = {'<UNK>': 0, '</S>': 1, 'this': 2, 'a': 3, 'one': 4} text_data = TextFile(files=[sentences1, sentences2], dictionary=dictionary, bos_token=None, preprocess=lower) stream = DataStream(text_data) epoch = stream.get_epoch_iterator() assert len(list(epoch)) == 4 epoch = stream.get_epoch_iterator() for sentence in zip(range(3), epoch): pass f = BytesIO() cPickle.dump(epoch, f) sentence = next(epoch) epoch = cPickle.load(f) assert next(epoch) == sentence assert_raises(StopIteration, next, epoch) # Test character level. dictionary = dict([(chr(ord('a') + i), i) for i in range(26)] + [(' ', 26)] + [('<S>', 27)] + [('</S>', 28)] + [('<UNK>', 29)]) text_data = TextFile(files=[sentences1, sentences2], dictionary=dictionary, preprocess=lower, level="character") sentence = next(DataStream(text_data).get_epoch_iterator())[0] assert sentence[:3] == [27, 19, 7] assert sentence[-3:] == [2, 4, 28]
Example #14
Source File: From attention-lvcsr with MIT License | 5 votes |
def test_in_memory(): skip_if_not_available(datasets=['mnist.hdf5']) # Load MNIST and get two batches mnist = MNIST(('train',), load_in_memory=True) data_stream = DataStream(mnist, iteration_scheme=SequentialScheme( examples=mnist.num_examples, batch_size=256)) epoch = data_stream.get_epoch_iterator() for i, (features, targets) in enumerate(epoch): if i == 1: break handle = known_features, _ = mnist.get_data(handle, slice(256, 512)) mnist.close(handle) assert numpy.all(features == known_features) # Pickle the epoch and make sure that the data wasn't dumped with tempfile.NamedTemporaryFile(delete=False) as f: filename = cPickle.dump(epoch, f) assert os.path.getsize(filename) < 1024 * 1024 # Less than 1MB # Reload the epoch and make sure that the state was maintained del epoch with open(filename, 'rb') as f: epoch = cPickle.load(f) features, targets = next(epoch) handle = known_features, _ = mnist.get_data(handle, slice(512, 768)) mnist.close(handle) assert numpy.all(features == known_features)
Example #15
Source File: From attention-lvcsr with MIT License | 5 votes |
def get_stream(): return DataStream( MNIST(('train',)), iteration_scheme=SequentialScheme(1500, 500))
Example #16
Source File: From attention-lvcsr with MIT License | 5 votes |
def test_batchwise(self): stream = DataStream( dataset=self.dataset, iteration_scheme=SequentialScheme(2, 2)) decoded_stream = ToBytes(stream) assert_equal([self.string_data], [s for s, in decoded_stream.get_epoch_iterator()])
Example #17
Source File: From attention-lvcsr with MIT License | 5 votes |
def test_examplewise(self): stream = DataStream( dataset=self.dataset, iteration_scheme=SequentialExampleScheme(2)) decoded_stream = ToBytes(stream) assert_equal(self.string_data, [s for s, in decoded_stream.get_epoch_iterator()])
Example #18
Source File: From fuel with MIT License | 5 votes |
def test_mapping(self): stream = DataStream(IterableDataset( transformer = Mapping(stream, lambda d: ([2 * i for i in d[0]],)) assert_equal(list(transformer.get_epoch_iterator()), list(zip([[2, 4, 6], [4, 6, 2], [6, 4, 2]])))
Example #19
Source File: From attention-lvcsr with MIT License | 5 votes |
def setUp(self): = DataStream( IndexableDataset( OrderedDict([('X', numpy.ones((4, 2, 2))), ('y', numpy.array([0, 1, 0, 1]))]), axis_labels={'X': ('batch', 'width', 'height'), 'y': ('batch',)}), iteration_scheme=SequentialScheme(4, 2)) self.transformer = Rename(, {'X': 'features', 'y': 'targets'})
Example #20
Source File: From attention-lvcsr with MIT License | 5 votes |
def setUp(self): stream = DataStream(IterableDataset(range(100))) self.transformer = Mapping(stream, lambda x: (x[0] + 1,))
Example #21
Source File: From attention-lvcsr with MIT License | 5 votes |
def test_value_error_on_different_stream_output_type(self): spanish_stream = DataStream(IndexableDataset(['Hola mundo!']), iteration_scheme=SequentialScheme(2, 2)) assert_raises(ValueError, Merge, self.streams + (spanish_stream,), ('english', 'french', 'spanish'))
Example #22
Source File: From attention-lvcsr with MIT License | 5 votes |
def setUp(self): self.streams = ( DataStream(IterableDataset(['Hello world!'])), DataStream(IterableDataset(['Bonjour le monde!']))) self.batch_streams = ( Batch(DataStream(IterableDataset(['Hello world!', 'Hi!'])), iteration_scheme=ConstantScheme(2)), Batch(DataStream(IterableDataset(['Bonjour le monde!', 'Salut!'])), iteration_scheme=ConstantScheme(2))) self.transformer = Merge( self.streams, ('english', 'french')) self.batch_transformer = Merge( self.batch_streams, ('english', 'french'))
Example #23
Source File: From attention-lvcsr with MIT License | 5 votes |
def test_value_error_on_example_stream(self): stream = DataStream( IterableDataset( dict(features=[[1], [2, 3]], targets=[[4, 5, 6], [7]]))) assert_raises(ValueError, Padding, stream)
Example #24
Source File: From attention-lvcsr with MIT License | 5 votes |
def test_mask_sources(self): transformer = Padding(Batch( DataStream( IterableDataset( OrderedDict([ ('features', [[1], [2, 3]]), ('targets', [[4, 5, 6], [7]])]))), ConstantScheme(2)), mask_sources=('features',)) assert_equal(len(next(transformer.get_epoch_iterator())), 3)
Example #25
Source File: From attention-lvcsr with MIT License | 5 votes |
def test_mask_dtype(self): transformer = Padding(Batch( DataStream( IterableDataset( dict(features=[[1], [2, 3]], targets=[[4, 5, 6], [7]]))), ConstantScheme(2)), mask_dtype='uint8') assert_equal( str(next(transformer.get_epoch_iterator())[1].dtype), 'uint8')
Example #26
Source File: From attention-lvcsr with MIT License | 5 votes |
def test_two_sources(self): transformer = Padding(Batch( DataStream( IterableDataset( dict(features=[[1], [2, 3]], targets=[[4, 5, 6], [7]]))), ConstantScheme(2))) assert len(next(transformer.get_epoch_iterator())) == 4
Example #27
Source File: From attention-lvcsr with MIT License | 5 votes |
def test_2d_sequences_error_on_unequal_shapes(self): stream = Batch( DataStream( IterableDataset([numpy.ones((3, 4)), 2 * numpy.ones((2, 3))])), ConstantScheme(2)) assert_raises(ValueError, next, Padding(stream).get_epoch_iterator())
Example #28
Source File: From attention-lvcsr with MIT License | 5 votes |
def test_2d_sequences(self): stream = Batch( DataStream( IterableDataset([numpy.ones((3, 4)), 2 * numpy.ones((2, 4))])), ConstantScheme(2)) it = Padding(stream).get_epoch_iterator() data, mask = next(it) assert data.shape == (2, 3, 4) assert (data[0, :, :] == 1).all() assert (data[1, :2, :] == 2).all() assert (mask == numpy.array([[1, 1, 1], [1, 1, 0]])).all()
Example #29
Source File: From attention-lvcsr with MIT License | 5 votes |
def test_value_error_on_example_stream(self): stream = DataStream( IterableDataset( dict(features=[[1], [2, 3]], targets=[[4, 5, 6], [7]]))) assert_raises(ValueError, Unpack, stream)
Example #30
Source File: From fuel with MIT License | 5 votes |
def test_mapping_accepts_list_or_dict(self): def mapping(d): return [2 * i for i in d[0]], stream = DataStream(IterableDataset( assert_raises(ValueError, lambda: Mapping(stream, mapping, mapping_accepts=int))