Python fuel.datasets.IndexableDataset() Examples
The following are 30
code examples of fuel.datasets.IndexableDataset().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
fuel.datasets
, or try the search function
.
Example #1
Source File: timit.py From CTC-LSTM with Apache License 2.0 | 6 votes |
def setup_datastream(path, batch_size, sort_batch_count, valid=False): A = numpy.load(os.path.join(path, ('valid_x_raw.npy' if valid else 'train_x_raw.npy'))) B = numpy.load(os.path.join(path, ('valid_phn.npy' if valid else 'train_phn.npy'))) C = numpy.load(os.path.join(path, ('valid_seq_to_phn.npy' if valid else 'train_seq_to_phn.npy'))) D = [B[x[0]:x[1], 2] for x in C] ds = IndexableDataset({'input': A, 'output': D}) stream = DataStream(ds, iteration_scheme=ShuffledExampleScheme(len(A))) stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size * sort_batch_count)) comparison = _balanced_batch_helper(stream.sources.index('input')) stream = Mapping(stream, SortMapping(comparison)) stream = Unpack(stream) stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size, num_examples=len(A))) stream = Padding(stream, mask_sources=['input', 'output']) return ds, stream
Example #2
Source File: test_transformers.py From fuel with MIT License | 5 votes |
def setUp(self): self.stream = DataStream( IndexableDataset( OrderedDict([('X', numpy.ones((4, 2, 2))), ('y', numpy.array([0, 1, 0, 1]))]), axis_labels={'X': ('batch', 'width', 'height'), 'y': ('batch',)}), iteration_scheme=SequentialScheme(4, 2)) self.transformer = Rename( self.stream, {'X': 'features', 'y': 'targets'})
Example #3
Source File: test_datasets.py From fuel with MIT License | 5 votes |
def test_batch_iteration_scheme_with_lists(self): """Batch schemes should work with more than ndarrays.""" data = IndexableDataset(OrderedDict([('foo', list(range(50))), ('bar', list(range(1, 51)))])) stream = DataStream(data, iteration_scheme=ShuffledScheme(data.num_examples, 5)) returned = [sum(batches, []) for batches in zip(*list(stream.get_epoch_iterator()))] assert set(returned[0]) == set(range(50)) assert set(returned[1]) == set(range(1, 51))
Example #4
Source File: test_streams.py From fuel with MIT License | 5 votes |
def test_axis_labels_on_produces_batches(self): dataset = IndexableDataset(numpy.eye(2)) axis_labels = {'data': ('batch', 'features')} dataset.axis_labels = axis_labels stream = DataStream(dataset, iteration_scheme=SequentialScheme(2, 2)) assert_equal(stream.axis_labels, axis_labels)
Example #5
Source File: test_transformers.py From fuel with MIT License | 5 votes |
def test_flatten_batches(self): wrapper = Flatten( DataStream(IndexableDataset(self.data), iteration_scheme=SequentialScheme(4, 2)), which_sources=('features',)) assert_equal( list(wrapper.get_epoch_iterator()), [(numpy.ones((2, 4)), numpy.array([[0], [1]])), (numpy.ones((2, 4)), numpy.array([[0], [1]]))])
Example #6
Source File: test_transformers.py From fuel with MIT License | 5 votes |
def test_axis_labels_on_flatten_batches(self): wrapper = Flatten( DataStream(IndexableDataset(self.data), iteration_scheme=SequentialScheme(4, 2), axis_labels={'features': ('batch', 'width', 'height'), 'targets': ('batch', 'index')}), which_sources=('features',)) assert_equal(wrapper.axis_labels, {'features': ('batch', 'feature'), 'targets': ('batch', 'index')})
Example #7
Source File: test_transformers.py From fuel with MIT License | 5 votes |
def test_axis_labels_on_flatten_batches_with_none(self): wrapper = Flatten( DataStream(IndexableDataset(self.data), iteration_scheme=SequentialScheme(4, 2), axis_labels={'features': None, 'targets': ('batch', 'index')}), which_sources=('features',)) assert_equal(wrapper.axis_labels, {'features': None, 'targets': ('batch', 'index')})
Example #8
Source File: test_transformers.py From fuel with MIT License | 5 votes |
def test_flatten_examples(self): wrapper = Flatten( DataStream(IndexableDataset(self.data), iteration_scheme=SequentialExampleScheme(4)), which_sources=('features',)) assert_equal( list(wrapper.get_epoch_iterator()), [(numpy.ones(4), 0), (numpy.ones(4), 1)] * 2)
Example #9
Source File: test_transformers.py From fuel with MIT License | 5 votes |
def test_filter_batches(self): data = [1, 2, 3, 4] data_filtered = [([3, 4],)] stream = DataStream(IndexableDataset(data), iteration_scheme=SequentialScheme(4, 2)) wrapper = Filter(stream, lambda d: d[0][0] % 3 == 0) assert_equal(list(wrapper.get_epoch_iterator()), data_filtered)
Example #10
Source File: test_transformers.py From fuel with MIT License | 5 votes |
def test_axis_labels_are_passed_through(self): stream = DataStream( IndexableDataset( {'features': [1, 2, 3, 4]}, axis_labels={'features': ('batch',)}), iteration_scheme=SequentialScheme(4, 2)) wrapper = Filter(stream, lambda d: d[0][0] % 3 == 0) assert_equal(wrapper.axis_labels, stream.axis_labels)
Example #11
Source File: test_transformers.py From fuel with MIT License | 5 votes |
def test_value_error_on_batch_stream(self): stream = DataStream(IndexableDataset([1, 2, 3, 4]), iteration_scheme=SequentialScheme(4, 2)) assert_raises(ValueError, Batch, stream, SequentialScheme(4, 2))
Example #12
Source File: test_transformers.py From fuel with MIT License | 5 votes |
def test_value_error_on_different_stream_output_type(self): spanish_stream = DataStream(IndexableDataset(['Hola mundo!']), iteration_scheme=SequentialScheme(2, 2)) assert_raises(ValueError, Merge, self.streams + (spanish_stream,), ('english', 'french', 'spanish'))
Example #13
Source File: test_datasets.py From fuel with MIT License | 5 votes |
def test_pickling(self): cPickle.loads(cPickle.dumps(IndexableDataset({'a': (1, 2)})))
Example #14
Source File: test_transformers.py From fuel with MIT License | 5 votes |
def setUp(self): self.string_data = [b'Hello', b'World!'] self.dataset = IndexableDataset( indexables={'words': [numpy.fromstring(s, dtype='uint8') for s in self.string_data]}, axis_labels={'words': ('batch', 'bytes')})
Example #15
Source File: test_predict.py From blocks-extras with MIT License | 5 votes |
def test_predict(): tempfile_path = os.path.join(gettempdir(), 'test_predict.npz') # set up mock datastream source = [[1], [2], [3], [4]] dataset = IndexableDataset(OrderedDict([('input', source)])) scheme = SequentialScheme(dataset.num_examples, batch_size=2) data_stream = DataStream(dataset, iteration_scheme=scheme) # simulate small "network" that increments the input by 1 input_tensor = tensor.matrix('input') output_tensor = input_tensor + 1 output_tensor.name = 'output_tensor' main_loop = MockMainLoop(extensions=[ PredictDataStream(data_stream=data_stream, variables=[output_tensor], path=tempfile_path, after_training=True), FinishAfter(after_n_epochs=1) ]) main_loop.run() # assert resulting prediction is saved prediction = numpy.load(tempfile_path) assert numpy.all(prediction[output_tensor.name] == numpy.array(source) + 1) try: os.remove(tempfile_path) except: pass
Example #16
Source File: weibo_vest.py From CopyNet with MIT License | 5 votes |
def build_data(data): # create fuel dataset. dataset = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']), ('target', data['target']), ('target_c', data['target_c']), ])) dataset.example_iteration_scheme \ = schemes.ShuffledExampleScheme(dataset.num_examples) return dataset
Example #17
Source File: bst_vest.py From CopyNet with MIT License | 5 votes |
def build_data(data): # create fuel dataset. dataset = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']), ('target', data['target']), ('target_c', data['target_c']), ])) dataset.example_iteration_scheme \ = schemes.ShuffledExampleScheme(dataset.num_examples) return dataset
Example #18
Source File: syn_vest.py From CopyNet with MIT License | 5 votes |
def build_data(data): # create fuel dataset. dataset = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']), ('target', data['target']), ('target_c', data['target_c']), ])) dataset.example_iteration_scheme \ = schemes.ShuffledExampleScheme(dataset.num_examples) return dataset
Example #19
Source File: lcsts_test.py From CopyNet with MIT License | 5 votes |
def build_data(data): # create fuel dataset. dataset = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']), ('target', data['target']), ('target_c', data['target_c']), ])) dataset.example_iteration_scheme \ = schemes.ShuffledExampleScheme(dataset.num_examples) return dataset
Example #20
Source File: lcsts_vest.py From CopyNet with MIT License | 5 votes |
def build_data(data): # create fuel dataset. dataset = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']), ('target', data['target']), ('target_c', data['target_c']), ])) dataset.example_iteration_scheme \ = schemes.ShuffledExampleScheme(dataset.num_examples) return dataset
Example #21
Source File: syntest.py From CopyNet with MIT License | 5 votes |
def build_data(data): # create fuel dataset. dataset = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']), ('target', data['target']), ('target_c', data['target_c']), ('rule_id', data['rule_id']), ('rule', data['rule'])])) dataset.example_iteration_scheme \ = schemes.ShuffledExampleScheme(dataset.num_examples) return dataset
Example #22
Source File: copynet.py From CopyNet with MIT License | 5 votes |
def build_data(source, target): # create fuel dataset. dataset = datasets.IndexableDataset(indexables=OrderedDict([('source', source), ('target', target)])) dataset.example_iteration_scheme \ = schemes.ShuffledExampleScheme(dataset.num_examples) return dataset, len(source)
Example #23
Source File: lcsts_sample.py From CopyNet with MIT License | 5 votes |
def build_data(data): # create fuel dataset. dataset = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']), ('target', data['target']), ('target_c', data['target_c']), ])) dataset.example_iteration_scheme \ = schemes.ShuffledExampleScheme(dataset.num_examples) return dataset
Example #24
Source File: build_dataset.py From CopyNet with MIT License | 5 votes |
def build_fuel(data): # create fuel dataset. dataset = datasets.IndexableDataset(indexables=OrderedDict([('data', data)])) dataset.example_iteration_scheme \ = schemes.ShuffledExampleScheme(dataset.num_examples) return dataset, len(data)
Example #25
Source File: test_transformers.py From attention-lvcsr with MIT License | 5 votes |
def test_axis_labels_on_flatten_batches_with_none(self): wrapper = Flatten( DataStream(IndexableDataset(self.data), iteration_scheme=SequentialScheme(4, 2), axis_labels={'features': None, 'targets': ('batch', 'index')}), which_sources=('features',)) assert_equal(wrapper.axis_labels, {'features': None, 'targets': ('batch', 'index')})
Example #26
Source File: keyphrase_copynet.py From seq2seq-keyphrase with MIT License | 5 votes |
def build_data(data): # create fuel dataset. dataset = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']), ('target', data['target']), # ('target_c', data['target_c']), ])) dataset.example_iteration_scheme \ = schemes.ShuffledExampleScheme(dataset.num_examples) return dataset
Example #27
Source File: test_aggregation.py From attention-lvcsr with MIT License | 5 votes |
def test_mean_aggregator(): num_examples = 4 batch_size = 2 features = numpy.array([[0, 3], [2, 9], [2, 4], [5, 1]], dtype=theano.config.floatX) dataset = IndexableDataset(OrderedDict([('features', features)])) data_stream = DataStream(dataset, iteration_scheme=SequentialScheme(num_examples, batch_size)) x = tensor.matrix('features') y = (x**2).mean(axis=0) y.name = 'y' z = y.sum() z.name = 'z' y.tag.aggregation_scheme = Mean(y, 1.) z.tag.aggregation_scheme = Mean(z, 1.) assert_allclose(DatasetEvaluator([y]).evaluate(data_stream)['y'], numpy.array([8.25, 26.75], dtype=theano.config.floatX)) assert_allclose(DatasetEvaluator([z]).evaluate(data_stream)['z'], numpy.array([35], dtype=theano.config.floatX))
Example #28
Source File: test_datasets.py From attention-lvcsr with MIT License | 5 votes |
def test_getattr(self): assert_equal(getattr(IndexableDataset({'a': (1, 2)}), 'a'), (1, 2))
Example #29
Source File: test_datasets.py From attention-lvcsr with MIT License | 5 votes |
def test_value_error_get_data_state(self): assert_raises( ValueError, IndexableDataset([1, 2, 3]).get_data, True, [1, 2])
Example #30
Source File: test_datasets.py From attention-lvcsr with MIT License | 5 votes |
def test_value_error_get_data_none_request(self): assert_raises( ValueError, IndexableDataset([1, 2, 3]).get_data, None, None)